require(tidyverse)
require(cowplot)
require(broom)
set.seed(12345)
Plotting ASR data for Fig. 1, S1, …
A large set of data were collected and organized by JY and HB into two files, as shown below. Later, we will import more recent datasets. The data formats are not the most consistent. However, we have tried to annotate the headers so as to make the output understandable.
Read in data
tmp <- read_tsv("../input/20220726-ASR-CFU-raw.tsv", col_types = cols(), comment = "#")
raw <- read_csv("../input/20221224-ASR-CFU-raw.csv", col_types = cols(), comment = "#") %>%
mutate(Date = gsub("d(\\d\\d)(\\d\\d)(\\d\\d)", "\\1/\\2/\\3", Date),
Len_1 = recode(Len_1, `2hr` = "2 hr", `45min` = "45 min"),
Len_2 = recode(Len_2, `2hr` = "2 hr")) %>%
select(-`MO/MM`, -`PO/PM`) %>%
bind_rows(add_column(tmp, Experimenter = "JL"))
# data sanity check, quick view
sapply(select(raw, Species, Strain, Genotype, Len_1, Len_2, H2O2), unique)
$Species
[1] "Cg" "Sc" "Kl"
$Strain
[1] "yH001" "yH154" "yH181" "yH149" "yH002" "yH609" "yH610" "yH271" "yH272"
[10] "yH273" "yH262" "yH275"
$Genotype
[1] "wt" "rim15Δ" "cta1Δ" "msn2Δ" "msn4Δ"
[6] "msn2Δmsn4Δ"
$Len_1
[1] "45 min" "2hr" "90 min" "135 min"
$Len_2
[1] "2 hr" "0 min"
$H2O2
[1] "Mock" "20mM" "40mM" "60mM" "80mM" "100mM" "2mM" "4mM"
[9] "6mM" "8mM" "10mM" "15mM" "25mM" "30mM" "35mM" "1.5mM"
[17] "2.5mM" "3mM" "3.5mM" "0.5mM" "1mM" "0.2mM" "0.3mM" "0.4mM"
[25] "0.26mM" "0.34mM" "0 mM" "20 mM" "40 mM" "60 mM" "80 mM" "100 mM"
[33] "2.2 mM" "2.5 mM" "2 mM" "4 mM" "6 mM" "8 mM" "10 mM"
species.label <- c(Sc = "S. cerevisiae", Cg = "C. glabrata")
p.survival <- list(
geom_point(shape = 1, stroke = 1, size = 2,
position = position_jitter(width = 0.15)),
stat_summary(fun = mean, fun.max = mean, fun.min = mean,
geom = "crossbar", color = "red", width = 0.5),
facet_wrap(~Species, scales = "free_x", labeller = as_labeller(species.label)),
scale_y_continuous(labels = scales::percent),
ylab("% survival"),
theme_cowplot(line_size = 0.7, font_size = 14),
theme(strip.text = element_text(size = rel(1), face = 3))
)
p.asr <- list(
geom_hline(yintercept = 1, linetype = 2, color = "gray50"),
geom_point(position = position_jitter(width = 0.1), size = 2),
stat_summary(fun.data = "mean_cl_boot", geom = "pointrange", color = "red",
position = position_nudge(x = 0.2)),
facet_wrap(~Species, scales = "free_x", labeller = as_labeller(species.label)),
theme_cowplot(line_size = 0.7),
theme(strip.text = element_text(size = rel(1.1), face = 3))
)
Goal
Experiment
Jinye has performed several experiments at these two concentrations, as shown below
raw %>%
filter(Genotype == "wt", H2O2 %in% c("10mM", "10 mM", "100mM", "100 mM"),
Len_1 == "45 min", Group == "PO") %>%
mutate(group = paste(recode(Strain, yH154 = "Sc", yH001 = "Cg1", yH002 = "Cg2"),
gsub(" ?mM", "", H2O2), sep = "_")) %>%
count(Date, group) %>%
pivot_wider(names_from = group, values_from = n)
We will use the 05/30/20, 06/01/20 and 06/04/20 data, which were three consecutive replicates meant to compare the ASR under -Pi between the two species. The other datasets were part of an experiment with different purposes.
Data
Main dataset:
| Date | Species | Replicate |
|---|---|---|
| 05/30/20 | Sc, Cg | 1 |
| 06/01/20 | Sc, Cg | 2 |
| 06/04/20 | Sc, Cg | 2 |
Note: one replicate for Sc on 05/30/20 showed much lower CFU numbers than the other plates.
filter(raw, Date == "05/30/20", Species == "Sc")
Jinye removed the second replicate for both Sc and Cg from her downstream analyses.
Below are the filtered data:
use.f1 <- paste0(c("06/04", "06/01", "05/30"), "/20")
tmp <- raw %>%
filter(Date %in% use.f1) %>%
mutate(
scaled = Count * Dilutions * 1e-2
) %>%
# remove uninformative columns. only one H2O2 conc used for each species
select(-Strain, -Genotype, -H2O2, -Len_1, -Len_2, -Experimenter)
# Assume the triplicates were paired in the order they appear in the table,
# i.e., the first row in the MO, MM, PO, PM groups belong to the same biological
# replicate, we can derive three ASR_scores for each date x species x Len_1
dat.f1 <- tmp %>%
group_by(Date, Species, Group) %>%
mutate(Repl = row_number()) %>%
# group by primary to calculate r (MO/MM) or r' (PO/PM)
separate(Group, into = c("Primary", "Secondary"), sep = 1) %>%
group_by(Date, Species, Repl, Primary) %>%
# calculate % survival
mutate(
scaled_M = scaled[Secondary == "M"],
r = num(scaled / scaled[Secondary == "M"], digits = 3)
) %>%
# remove the secondary mock as the information are all used
filter(Secondary != "M") %>%
# 5/30/20 replicate 2 is excluded
filter(!(Date == "05/30/20" & Repl == 2))
dat.f1
Plotting
Statistical tests
Basal survival rate
The basal survival rates between species within the same day are not “paired”. We will use a rank-sum test here.
tmp <- dat.f1 %>%
filter(Primary == "M") %>%
pivot_wider(id_cols = c(Date, Repl), names_from = Species, values_from = r)
tmp
with(tmp, t.test(as.numeric(Cg), as.numeric(Sc), paired = FALSE))
Welch Two Sample t-test
data: as.numeric(Cg) and as.numeric(Sc)
t = 0.60672, df = 11.515, p-value = 0.5558
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.005202894 0.009192871
sample estimates:
mean of x mean of y
0.01301413 0.01101914
with(tmp, wilcox.test(Cg, Sc, paired = FALSE))
Wilcoxon rank sum exact test
data: Cg and Sc
W = 41, p-value = 0.3823
alternative hypothesis: true location shift is not equal to 0
basal survival rates are not significantly between species
Primary stress enhanced survival (ASR) in Cg
The comparison between r and r’ is paired. We will use a signed-rank test.
tmp <- dat.f1 %>%
filter(Species == "Cg") %>%
pivot_wider(id_cols = c(Date, Repl), names_from = Primary, values_from = r) %>%
mutate(ASR = P/M)
tmp
x <- Hmisc::smean.cl.boot(tmp$ASR)
sprintf("ASR_score mean = %.2f, 95%% CI by bootstrap = [%.2f, %.2f]", x[1], x[2], x[3])
[1] "ASR_score mean = 3.44, 95% CI by bootstrap = [2.56, 4.33]"
with(tmp, t.test(as.numeric(P), as.numeric(M), paired = TRUE, alternative = "g"))
Paired t-test
data: as.numeric(P) and as.numeric(M)
t = 5.5041, df = 7, p-value = 0.0004513
alternative hypothesis: true mean difference is greater than 0
95 percent confidence interval:
0.01921734 Inf
sample estimates:
mean difference
0.02930431
with(tmp, wilcox.test(P, M, paired = TRUE, alternative = "g"))
Wilcoxon signed rank exact test
data: P and M
V = 36, p-value = 0.003906
alternative hypothesis: true location shift is greater than 0
Primary stress no effect in Sc
tmp <- dat.f1 %>%
filter(Species == "Sc") %>%
pivot_wider(id_cols = c(Date, Repl), names_from = Primary, values_from = r) %>%
mutate(ASR = P/M)
tmp
x <- Hmisc::smean.cl.boot(tmp$ASR)
sprintf("ASR_score mean = %.2f, 95%% CI by bootstrap = [%.2f, %.2f]", x[1], x[2], x[3])
[1] "ASR_score mean = 1.27, 95% CI by bootstrap = [0.90, 1.66]"
with(tmp, t.test(as.numeric(P), as.numeric(M), paired = TRUE, alternative = "g"))
Paired t-test
data: as.numeric(P) and as.numeric(M)
t = -0.10757, df = 7, p-value = 0.5413
alternative hypothesis: true mean difference is greater than 0
95 percent confidence interval:
-0.003806367 Inf
sample estimates:
mean difference
-0.0002045084
with(tmp, wilcox.test(P, M, paired = TRUE, alternative = "g"))
Wilcoxon signed rank exact test
data: P and M
V = 21, p-value = 0.3711
alternative hypothesis: true location shift is greater than 0
Goal
Experiment
Data
Main dataset:
| Species | H2O2 | Description |
|---|---|---|
| C. glabrata | 0, 20, 40, 60, 80, 100 mM | Only M and O, no primary stress |
| S. cerevisiae | 0, 2, 4, 6, 8, 10 mM | Only M and O, no primary stress |
| Date | Species | Replicate |
|---|---|---|
| 03/21/22 | Sc, Cg | 1 |
| 03/22/22 | Sc, Cg | 2 |
| 03/24/22 | Sc, Cg | 3 |
| 03/28/22 | Sc, Cg | 4 |
| 04/01/22 | Sc, Cg | 5 |
use.s1 <- paste0(c("03/21", "03/22", "03/24", "03/28", "04/01"), "/22")
dat.s1 <- raw %>% select(-Experimenter, -Len_1, -Len_2, -Genotype) %>%
mutate(scaled = Count * Dilutions * 1e-3) %>%
filter(Date %in% use.s1) %>%
group_by(Date, Strain) %>%
mutate(r = num(scaled / scaled[Group == "M"], digits = 3)) %>%
select(-Group)
dat.s1
#write_tsv(dat.s1, file = "../input/20230302-fig-s1a-data-hb.tsv")
p <- dat.s1 %>%
mutate(H2O2 = gsub(" mM", "", H2O2),
H2O2 = fct_reorder(H2O2, as.numeric(H2O2))) %>%
filter(H2O2 != "0") %>%
ggplot(aes(x = H2O2, y = r)) +
geom_point(aes(shape = Date)) +
#geom_line(aes(group = Date, color = Date), alpha = 0.8) +
scale_color_brewer(palette = "Dark2") +
scale_shape_manual(values = 1:5, guide = "none") +
stat_summary(fun.data = "mean_cl_boot", geom = "pointrange", color = "red2",
size = 0.8, position = position_nudge(x = 0.3)) +
facet_wrap(~Species, scales = "free_x", labeller = as_labeller(species.label)) +
xlab(bquote(H[2]*O[2]~(mM))) + ylab("Basal survial rate (r)") +
theme_cowplot(line_size = 0.7) +
theme(strip.text = element_text(size = rel(1), face = 3))
p
#ggsave("../output/20230101-basal-survival-across-h2o2-range-sc-cg.png", width = 5, height = 3)
Statistical test for difference between species at the highest concentration. Using the Wilcoxon rank-sum test (aka Mann-Whitney’s U test)
tmp <- dat.s1 %>%
filter(H2O2 %in% c("100 mM", "10 mM"))
tmp %>% group_by(Species) %>% summarize(mean = mean(r))
wilcox.test(r ~ Species, data = tmp, paired = FALSE)
Wilcoxon rank sum exact test
data: r by Species
W = 12, p-value = 1
alternative hypothesis: true location shift is not equal to 0
no significant differences at 10 vs 100 mM
Goal
Data
Main dataset:
| Species | H2O2 | Description |
|---|---|---|
| C. glabrata | 0, 20, 40, 60, 80, 100 mM | full ASR experiment |
| S. cerevisiae | 0, 2, 4, 6, 8, 10mM | full ASR experiment |
| Date | Species | Strain | Replicate |
|---|---|---|---|
| 07/16/22 | Cg | yH001,2 | a1 |
| 07/17/22 | Cg | yH001,2 | a2 |
| 07/18/22 | Cg | yH001,2 | a3 |
| 07/29/22 | Cg | yH001 | b1 |
| 07/29/22 | Sc | yH154 | b1 |
| 08/01/22 | Cg | yH001 | b2 |
| 08/01/22 | Sc | yH154 | b2 |
| 08/02/22 | Cg | yH001 | b3 |
| 08/02/22 | Sc | yH154 | b3 |
| 08/04/22 | Cg | yH001 | b4 |
| 08/04/22 | Sc | yH154 | b4 |
| 08/11/22 | Cg | yH001 | b5 |
| 08/11/22 | Sc | yH154 | b5 |
use.s1b <- paste0(c("07/16", "07/17", "07/18", "07/29", "08/01", "08/02", "08/04", "08/11"), "/22")
dat.s1b <- raw %>%
separate(Group, into = c("Primary", "Secondary"), sep = 1) %>%
mutate(
# arbitrarily decide any CFU counts < 3 are not included due to high CV
Count = ifelse(Count < 3, NA, Count),
scaled = Count * Dilutions * 1e-3
) %>%
filter(Date %in% use.s1b) %>%
group_by(Date, Strain, Primary) %>%
# calculate r and r'
mutate(r = num(scaled / scaled[Secondary == "M"], digits = 3)) %>%
# remove the secondary mock as the information are all used
filter(Secondary != "M") %>%
pivot_wider(id_cols = c(Date, Strain, Species, H2O2),
names_from = Primary, values_from = r, names_prefix = "r") %>%
mutate(ASR_score = rP / rM)
dat.s1b
#write_tsv(dat.s1b, file = "../input/20230302-fig-s1b-data-hb.tsv")
p <- dat.s1b %>%
filter(!is.na(ASR_score)) %>%
mutate(H2O2 = gsub(" ?mM", "", H2O2),
H2O2 = fct_reorder(H2O2, as.numeric(H2O2))) %>%
filter(H2O2 != "0") %>%
ggplot(aes(x = H2O2, y = ASR_score)) + p.asr +
#geom_hline(yintercept = 1, linetype = 2, color = "gray50") +
#geom_point(position = position_jitter(width = 0.1), alpha = 0.8) +
#stat_summary(fun.data = "mean_cl_boot", geom = "pointrange", color = "red",
# size = 0.8, position = position_nudge(x = 0.2)) +
#scale_y_log10() +
#facet_wrap(~Species, scales = "free_x", labeller = as_labeller(species.label)) +
xlab(bquote(H[2]*O[2]~(mM))) + ylab("ASR score (r'/r)")# +
#theme_cowplot(line_size = 0.7) +
#theme(strip.text = element_text(size = rel(1), face = 3))
p
#ggsave("../output/20230102-ASR-score-across-h2o2-range-sc-cg.png", width = 5, height = 3)
dat.s1b %>%
filter(!is.na(ASR_score)) %>%
mutate(H2O2 = gsub(" ?mM", "", H2O2),
H2O2 = fct_reorder(H2O2, as.numeric(H2O2))) %>%
filter(H2O2 != "0") %>%
group_by(Species, H2O2) %>%
summarise(rM = mean(rM), ASR = mean(ASR_score), sd_asr = sd(ASR_score))
`summarise()` has grouped output by 'Species'. You can override using the `.groups` argument.
Goal
Data
Main dataset:
| Species | Len_noPi | H2O2 | Description |
|---|---|---|---|
| C. glabrata | 45, 90, 135 min | 100 mM | full ASR experiment |
| S. cerevisiae | 45, 90, 135 min | 10mM | full ASR experiment |
| Date | Species | Len_noPi | Strain |
|---|---|---|---|
| 06/06/20 | Cg | 45 min | yH001 |
| 06/06/20 | Sc | 45 min | yH154 |
| 06/06/20 | Cg | 90 min | yH001 |
| 06/06/20 | Sc | 90 min | yH154 |
| 06/06/20 | Cg | 135 min | yH001 |
| 06/06/20 | Sc | 135 min | yH154 |
Supporting dataset:
| Date | Species | Len_noPi | Strain |
|---|---|---|---|
| 05/30/22 | Cg | 45 min | yH001 |
| 05/30/22 | Sc | 45 min | yH154 |
| 06/01/22 | Cg | 45 min | yH001 |
| 06/01/22 | Sc | 45 min | yH154 |
| 06/04/22 | Cg | 45 min | yH001 |
| 06/04/22 | Sc | 45 min | yH154 |
use.s2 <- paste0(c("06/06", "06/04", "06/01", "05/30"), "/20")
tmp <- raw %>%
filter(Date %in% use.s2) %>%
mutate(
# arbitrarily decide any CFU counts < 3 are not included due to high CV
# not in use here. instead, label them in the plot (see below)
# count = ifelse(Count < 3, NA, Count),
scaled = Count * Dilutions * 1e-2
) %>%
# remove uninformative columns. only one H2O2 conc used for each species
select(-Strain, -Genotype, -H2O2, -Len_2, -Experimenter)
Assume the triplicates were paired in the order they appear in the table, i.e., the first row in the MO, MM, PO, PM groups belong to the same biological replicate, we can derive three ASR_scores for each date x species x Len_1
dat.s2 <- tmp %>%
group_by(Date, Species, Len_1, Group) %>%
mutate(Repl = row_number()) %>%
pivot_wider(id_cols = c(Date, Species, Len_1, Repl),
names_from = Group, values_from = scaled, names_sep = "") %>%
mutate(
r = MO/MM,
rp = PO/PM,
ASR_score = rp/r,
low_count = MO < 1 # mark experiments with < 3 counts
)
dat.s2
#write_tsv(dat.s2, file = "../input/20230302-fig-s2-data-hb.tsv")
p <- dat.s2 %>%
#filter(!is.na(ASR_score)) %>%
mutate(len_1 = factor(gsub(" ?min", "", Len_1), levels = c("45", "90", "135"))) %>%
ggplot(aes(x = len_1, y = ASR_score)) + p.asr +
# geom_hline(yintercept = 1, linetype = 2, color = "gray50") +
# geom_point(position = position_jitter(width = 0.1), size = 2) +
# stat_summary(position = position_nudge(x = 0.2),
# fun.data = "mean_cl_boot", geom = "pointrange", color = "red") +
xlab("Length of primary stress (min)") + ylab("ASR score (r'/r)") +
# facet_wrap(~Species, scales = "free_x", labeller = as_labeller(species.label)) +
# theme_cowplot(line_size = 0.7) +
theme(legend.text = element_text(size = rel(1), face = 3),
legend.position = c(0.1, 0.85),
strip.text = element_text(size = rel(1), face = 3))
p
#ggsave("../output/20230103-ASR-score-across-noPi-length-sc-cg.png", width = 5, height = 3)
dat.s2 %>%
#filter(!low_count) %>%
mutate(len_1 = factor(gsub(" ?min", "", Len_1), levels = c("45", "90", "135"))) %>%
group_by(Species, len_1) %>%
summarize(n = n(), ASR = mean(ASR_score), sd_asr = sd(ASR_score), .groups = "drop") %>%
mutate(across(where(is.double), num, sigfig = 3))
Because of the small sample size (3) for 90 and 135 minutes, Wilcoxon signed-rank test doesn’t have power to detect significant differences at any conventional size of the test (lower p-value for a sample size 3 is 0.25). Using t-test below, knowing that the normal assumption is likely not met. Overall, we should focus on the trend and not the statistical significance in this result.
dat.s2 %>%
ungroup() %>%
mutate(len_1 = factor(gsub(" ?min", "", Len_1), levels = c("45", "90", "135"))) %>%
select(Species, len_1, r, rp) %>%
nest(data = c(r, rp)) %>%
mutate(
test = map(data, ~ t.test(.x$rp, .x$r, paired = TRUE, alternative = "g")),
tidied = map(test, tidy)
) %>%
unnest(tidied) %>%
select(Species, len_1, p.value, method, alternative) %>%
mutate(p.bonf = p.adjust(p.value, method = "bonf"),
p.holm = p.adjust(p.value, method = "holm"),
p.hoch = p.adjust(p.value, method = "hoch"),
across(starts_with("p."), round, digits = 5)) %>%
arrange(Species, len_1)
p <- dat.s2 %>%
#filter(!is.na(ASR_score)) %>%
mutate(len_1 = factor(Len_1, levels = c("45 min", "90 min", "135 min"))) %>%
ggplot(aes(x = Species, y = ASR_score)) +
geom_point(#aes(fill = low_count),
position = position_jitter(0.1), size = 1.5, shape = 21) +
stat_summary(fun.data = "mean_se", geom = "pointrange", color = "red") +
#scale_y_log10() +
scale_shape_manual(values = c(21,22), labels = species.label) +
#scale_fill_manual(values = c("white", "gray40"), guide = "none") +
#facet_wrap(~Species, scales = "free_x", labeller = as_labeller(species.label)) +
facet_wrap(~ len_1, scales = "free") +
xlab("Length of primary stress (min)") + ylab("ASR score (r'/r)") +
theme_cowplot(line_size = 0.7)
p
#ggsave("../output/20230103-ASR-score-across-noPi-length-sep-panel.png", width = 6, height = 3)
Goal
Experiment
Data
Main dataset:
| Species | Oxidant | Concentration | #Reps | Dates |
|---|---|---|---|---|
| C. glabrata | tBOOH | 0, 1, 1.5, 2 mM | 6 | 8/23-8/31 |
| C. glabrata | Menadione | 0, 250, 300, 350 uM | 6 | 7/17-7/26 |
| C. glabrata | MSB | 0, 300, 350, 400 mM | 6 | 9/3-9/8 |
Analysis
tmp <- read_tsv("../input/20230901-tBOOH-ASR-CFU-ht.tsv",
col_types = cols(), comment = "#") %>%
mutate(
scaled = Count * Dilutions * 1e-2
)
dat.s3a <- tmp %>%
# remove the phosphate starvation experiments - they were included as positive
# controls, but because there is only one replicate and the secondary stress
# was applied for 3 instead of 2 hours, CFU is extremely low and not conclusive
filter(Len_2 != "3 hr") %>%
# remove uninformative / nonvariable columns
select(-Len_1, -Len_2, -Species, -Strain) %>%
# group by experiment and [tBOOh] to calculate r (MO/MM) or r' (PO/PM)
separate(Group, into = c("Primary", "Secondary"), sep = 1) %>%
group_by(Date, Primary) %>%
# calculate % survival
mutate(scaled_M = scaled[Secondary == "M"],
r = num(scaled / scaled[Secondary == "M"], digits = 3)) %>%
# remove the secondary mock as the information are all used
filter(Secondary != "M") %>%
mutate(
Primary = factor(Primary, levels = c("M", "P"),
labels = c("Mock", "-Pi")),
tBOOH = factor(tBOOH, levels = c("0.5 mM", "1 mM", "1.5 mM", "2 mM"))
)
dat.s3a
dat.s3a %>%
# leave out the lowest concentration in each group
#filter(!H2O2 %in% c("2 mM", "60 mM")) %>%
ggplot(aes(x = tBOOH, y = r)) + #p.survival[-3] +
geom_point(aes(shape = Primary), stroke = 1, size = 2,
position = position_dodge(0.7)) +
stat_summary(aes(group = Primary), position = position_dodge(0.7),
fun = mean, fun.max = mean, fun.min = mean,
geom = "crossbar", color = "red", width = 0.5) +
facet_wrap(~ tBOOH, nrow = 1, scales = "free_x") +
scale_shape_manual(values = c("Mock" = 1, "-Pi" = 16)) +
scale_y_continuous(labels = scales::percent) +
xlab("tBOOH") + ylab("% survival") +
theme_bw(base_size = 14, base_line_size = 1) +
panel_border(color = "black", size = 1) +
theme(strip.text = element_text(size = rel(1), face = 3),
strip.background = element_blank())
From the plot, it is clear that phosphate starvation does NOT provide ASR for tBOOH below we will focus on the 2 mM dataset, as the basal survival rate is ~5%
tmp <- dat.s3a %>%
filter(tBOOH == "2 mM") %>%
pivot_wider(id_cols = Date, names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/`Mock`)
tmp
x <- Hmisc::smean.cl.boot(tmp$ASR)
sprintf("ASR_score mean = %.2f, 95%% CI by bootstrap = [%.2f, %.2f]", x[1], x[2], x[3])
[1] "ASR_score mean = 0.71, 95% CI by bootstrap = [0.49, 0.88]"
with(tmp, t.test(as.numeric(`-Pi`), as.numeric(Mock), paired = TRUE, alternative = "g"))
Paired t-test
data: as.numeric(`-Pi`) and as.numeric(Mock)
t = -2.9394, df = 5, p-value = 0.9839
alternative hypothesis: true mean difference is greater than 0
95 percent confidence interval:
-0.02271145 Inf
sample estimates:
mean difference
-0.01347428
with(tmp, wilcox.test(`-Pi`, Mock, paired = TRUE, alternative = "g"))
Wilcoxon signed rank exact test
data: -Pi and Mock
V = 0, p-value = 1
alternative hypothesis: true location shift is greater than 0
Goal
Experiment
Analysis
tmp <- read_tsv("../input/20230801-menadione-ASR-CFU-ht.tsv",
col_types = cols(), comment = "#") %>%
mutate(
scaled = Count * Dilutions * 1e-2
)
dat.s3b <- tmp %>%
# remove uninformative / nonvariable columns
select(-Len_1, -Len_2, -Species, -Strain) %>%
# group by experiment and [tBOOh] to calculate r (MO/MM) or r' (PO/PM)
separate(Group, into = c("Primary", "Secondary"), sep = 1) %>%
group_by(Date, Primary) %>%
# calculate % survival
mutate(scaled_M = scaled[Secondary == "M"],
r = scaled / scaled[Secondary == "M"]) %>%
# remove the secondary mock as the information are all used
filter(Secondary != "M") %>%
mutate(
Primary = factor(Primary, levels = c("M", "P"),
labels = c("Mock", "-Pi")),
Concentration = factor(Concentration,
levels = c("250 uM", "300 uM", "350 uM",
"400 uM", "450 uM")),
Flag = Count < 10
) %>%
rename(Menadione = Concentration)
dat.s3b
dat.s3b %>%
# leave out the lowest concentration in each group
#filter(!H2O2 %in% c("2 mM", "60 mM")) %>%
ggplot(aes(x = Menadione, y = r)) + #p.survival[-3] +
geom_point(aes(shape = Primary, color = Flag), stroke = 1, size = 2,
position = position_dodge(0.7)) +
stat_summary(aes(group = Primary),
position = position_dodge(0.7),
fun = mean, fun.max = mean, fun.min = mean,
geom = "crossbar", color = "red", width = 0.5) +
facet_wrap(~ Menadione, nrow = 1, scales = "free_x") +
scale_shape_manual(values = c("Mock" = 1, "-Pi" = 16)) +
scale_color_manual("Count < 10", values = c("gray10", "green3")) +
scale_y_continuous(labels = scales::percent) +
xlab("Menadione") + ylab("% survival") +
theme_bw(base_size = 14, base_line_size = 1) +
panel_border(color = "black", size = 1) +
theme(axis.text.x = element_blank(),
strip.text = element_text(size = rel(1), face = 3),
strip.background = element_blank())
the whole concentration series shows consistent trend of ASR. 400 uM would have been a good concentration to focus on, but we don’t have enough replicates at this concentration
We focus on 350 uM set because it shows ASR and have 5 usable replicates.
tmp <- dat.s3b %>%
filter(Menadione == "350 uM", !Flag) %>%
pivot_wider(id_cols = Date, names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/`Mock`) %>%
na.omit()
tmp
x <- Hmisc::smean.cl.boot(tmp$ASR)
sprintf("ASR_score mean = %.2f, 95%% CI by bootstrap = [%.2f, %.2f]", x[1], x[2], x[3])
[1] "ASR_score mean = 2.96, 95% CI by bootstrap = [1.52, 4.88]"
with(tmp, t.test(as.numeric(`-Pi`), as.numeric(Mock), paired = TRUE, alternative = "g"))
Paired t-test
data: as.numeric(`-Pi`) and as.numeric(Mock)
t = 2.5106, df = 4, p-value = 0.03301
alternative hypothesis: true mean difference is greater than 0
95 percent confidence interval:
0.02443896 Inf
sample estimates:
mean difference
0.1620079
with(tmp, wilcox.test(`-Pi`, Mock, paired = TRUE, alternative = "g"))
Wilcoxon signed rank exact test
data: -Pi and Mock
V = 15, p-value = 0.03125
alternative hypothesis: true location shift is greater than 0
Goal
Experiment
Analysis
Note that we will overwrite dat.s3b.
tmp <- read_tsv("../input/20230910-MSB-ASR-CFU-ht.tsv",
col_types = cols(), comment = "#") %>%
mutate(
scaled = Count * Dilutions * 1e-2
)
dat.s3b <- tmp %>%
# remove the phosphate starvation experiments - they were included as positive
# controls, but because there is only one replicate and the secondary stress
# was applied for 3 instead of 2 hours, CFU is extremely low and not conclusive
filter(Len_2 != "3 hr") %>%
# remove uninformative / nonvariable columns
select(-Len_1, -Len_2, -Species, -Strain) %>%
# group by experiment and [tBOOh] to calculate r (MO/MM) or r' (PO/PM)
separate(Group, into = c("Primary", "Secondary"), sep = 1) %>%
group_by(Date, Primary) %>%
# calculate % survival
mutate(scaled_M = scaled[Secondary == "M"],
r = num(scaled / scaled[Secondary == "M"], digits = 3)) %>%
# remove the secondary mock as the information are all used
filter(Secondary != "M") %>%
mutate(
Primary = factor(Primary, levels = c("M", "P"),
labels = c("Mock", "-Pi")),
Menadione = factor(Concentration, levels = c("300 mM", "350 mM", "400 mM", "60mM H2O2"))
)
dat.s3b
dat.s3b %>%
# leave out the lowest concentration in each group
#filter(!H2O2 %in% c("2 mM", "60 mM")) %>%
ggplot(aes(x = Menadione, y = r)) + #p.survival[-3] +
geom_point(aes(shape = Primary), stroke = 1, size = 2,
position = position_dodge(0.7)) +
stat_summary(aes(group = Primary), position = position_dodge(0.7),
fun = mean, fun.max = mean, fun.min = mean,
geom = "crossbar", color = "red", width = 0.5) +
facet_wrap(~ Menadione, nrow = 1, scales = "free_x") +
scale_shape_manual(values = c("Mock" = 1, "-Pi" = 16)) +
scale_y_continuous(labels = scales::percent) +
xlab("MSB") + ylab("% survival") +
theme_bw(base_size = 14, base_line_size = 1) +
panel_border(color = "black", size = 1) +
theme(strip.text = element_text(size = rel(1), face = 3),
strip.background = element_blank())
at the highest, 400 mM concentration, there seems to be ASR below we will focus on the 400 mM dataset, where the basal survival rate is ~1%
dat.s3ab <- bind_rows(
tBOOH = dat.s3a %>% filter(tBOOH == "2 mM") %>% rename(Concentration = tBOOH),
MSB = dat.s3b %>% filter(Concentration == "400 mM") %>% select(-Menadione),
H2O2 = dat.s3b %>% filter(Concentration == "60mM H2O2") %>% select(-Menadione),
.id = "ROS"
) %>%
mutate(Concentration = gsub("60mM H2O2", "60 mM", Concentration),
ROS = factor(ROS, levels = c("tBOOH", "MSB", "H2O2"),
labels = c("tBOOH", "MSB", "H[2]*O[2]")))
dat.s3ab %>%
ggplot(aes(x = Concentration, y = r)) + #p.survival[-3] +
geom_point(aes(shape = Primary), stroke = 1, size = 2,
position = position_dodge(0.7)) +
stat_summary(aes(group = Primary), position = position_dodge(0.7),
fun = mean, fun.max = mean, fun.min = mean,
geom = "crossbar", color = "red", width = 0.5) +
facet_wrap(~ ROS, nrow = 1, scales = "free_x", labeller = label_parsed) +
scale_shape_manual(values = c("Mock" = 1, "-Pi" = 16)) +
scale_y_continuous(labels = scales::percent) +
xlab(NULL) + ylab("% survival") +
theme_bw(base_size = 14, base_line_size = 1) +
background_grid(major = "none") +
panel_border(color = "black", size = 1) +
theme(strip.text = element_text(size = rel(1), face = 3),
strip.background = element_blank())
ggsave("../output/20230912-figs3-other-ros.png", width = 5, height = 3)
We focus on 400 mM set because it shows the most clear ASR effect. Note that a test done for the 350 mM, which leads to a similar 5% basal survival rate as 2 mM tBOOH, showed a moderate ASR (mean = 1.55) effect, with a P-value approaching 0.05 (0.08). Jinye’s spotting assay also showed clear ASR result at 350 mM.
tmp <- dat.s3b %>%
filter(Menadione == "400 mM") %>%
pivot_wider(id_cols = Date, names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/`Mock`)
tmp
x <- Hmisc::smean.cl.boot(tmp$ASR)
sprintf("ASR_score mean = %.2f, 95%% CI by bootstrap = [%.2f, %.2f]", x[1], x[2], x[3])
[1] "ASR_score mean = 2.41, 95% CI by bootstrap = [1.79, 3.00]"
with(tmp, t.test(as.numeric(`-Pi`), as.numeric(Mock), paired = TRUE, alternative = "g"))
Paired t-test
data: as.numeric(`-Pi`) and as.numeric(Mock)
t = 3.0552, df = 5, p-value = 0.01413
alternative hypothesis: true mean difference is greater than 0
95 percent confidence interval:
0.007146316 Inf
sample estimates:
mean difference
0.02099059
with(tmp, wilcox.test(`-Pi`, Mock, paired = TRUE, alternative = "g"))
Wilcoxon signed rank exact test
data: -Pi and Mock
V = 21, p-value = 0.01563
alternative hypothesis: true location shift is greater than 0
In the same experiment, ASR for H2O2 was confirmed:
tmp <- dat.s3b %>%
filter(Menadione == "60mM H2O2") %>%
pivot_wider(id_cols = Date, names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/`Mock`)
tmp
x <- Hmisc::smean.cl.boot(tmp$ASR)
sprintf("ASR_score mean = %.2f, 95%% CI by bootstrap = [%.2f, %.2f]", x[1], x[2], x[3])
[1] "ASR_score mean = 16.40, 95% CI by bootstrap = [7.28, 25.30]"
with(tmp, t.test(as.numeric(`-Pi`), as.numeric(Mock), paired = TRUE, alternative = "g"))
Paired t-test
data: as.numeric(`-Pi`) and as.numeric(Mock)
t = 4.3054, df = 4, p-value = 0.006296
alternative hypothesis: true mean difference is greater than 0
95 percent confidence interval:
0.06709178 Inf
sample estimates:
mean difference
0.1328976
with(tmp, wilcox.test(`-Pi`, Mock, paired = TRUE, alternative = "g"))
Wilcoxon signed rank exact test
data: -Pi and Mock
V = 15, p-value = 0.03125
alternative hypothesis: true location shift is greater than 0
Goal
Experiment
Data
| Species | Strain | Genotype | H2O2 |
|---|---|---|---|
| C. glabrata | yH001, yH002 | wildtype | 80, 100 mM |
| C. glabrata | yH271, yH272 | cta1∆ | 2.2, 2.5 mM |
Six replicates, two each from 07/08, 07/11, 07/12 of 2022.
use.f3 <- paste0(c("07/08", "07/11", "07/12"), "/22")
tmp <- raw %>%
filter(Date %in% use.f3) %>%
mutate(
scaled = Count * Dilutions * 1e-2
) %>%
# remove uninformative columns. only one H2O2 conc used for each species
select(-Len_1, -Len_2, -Experimenter)
# Assume the replicates were paired in the order they appear in the table,
# i.e., the first row in the MO, MM, PO, PM groups belong to the same biological
# replicate, we can derive three ASR_scores for each date x species x Len_1
dat.f3 <- tmp %>%
# group by primary to calculate r (MO/MM) or r' (PO/PM)
separate(Group, into = c("Primary", "Secondary"), sep = 1) %>%
group_by(Date, Strain, Primary) %>%
# calculate % survival
mutate(scaled_M = scaled[Secondary == "M"],
r = num(scaled / scaled[Secondary == "M"], digits = 3)) %>%
# remove the secondary mock as the information are all used
filter(Secondary != "M")
dat.f3
dat.f3 %>%
mutate(
Primary = factor(Primary, levels = c("M", "P"),
labels = c("Mock", "-Pi")),
Genotype = factor(Genotype, levels = c("wt", "cta1Δ"),
labels = c("wildtype", "cta1Δ")),
Group = factor(H2O2, levels = c("100 mM", "2.5 mM", "80 mM", "2.2 mM"),
labels = c("High", "High", "Medium", "Medium"))
) %>%
ggplot(aes(x = H2O2, y = r)) + #p.survival[-3] +
geom_point(aes(shape = Primary), stroke = 1, size = 2,
position = position_dodge(0.9)) +
stat_summary(aes(group = Primary), position = position_dodge(0.9),
fun = mean, fun.max = mean, fun.min = mean,
geom = "crossbar", color = "red", width = 0.5) +
facet_wrap(~ Group + Genotype, nrow = 1, scales = "free_x") +
scale_shape_manual(values = c("Mock" = 1, "-Pi" = 16)) +
scale_y_continuous(labels = scales::percent) +
xlab(bquote(H[2]*O[2]~(mM))) + ylab("% survival") +
theme_bw(base_size = 14, base_line_size = 1) +
panel_border(color = "black", size = 1) +
theme(strip.text = element_text(size = rel(1), face = 3),
strip.background = element_blank())
To be consistent with panel C, we will use the 100 mM vs 2.5 mM pair and leave out the 80 mM vs 2.2 mM pair.
dat.f3a <- dat.f3 %>%
mutate( Primary = factor(Primary, levels = c("M", "P"),
labels = c("Mock", "-Pi")),
Genotype = factor(Genotype, levels = c("wt", "cta1Δ"),
labels = c("wildtype", "cta1Δ")),
Group = factor(H2O2, levels = c("100 mM", "2.5 mM", "80 mM", "2.2 mM"),
labels = c("High", "High", "Medium", "Medium"))) %>%
filter(Group == "High") %>%
select(-Group) %>%
ungroup()
#write_tsv(dat.f3a, file = "../input/20230520-fig-3d-data-hb.tsv")
p <- dat.f3a %>%
mutate(Genotype = fct_recode(Genotype, "WT" = "wildtype")) %>%
ggplot(aes(x = Primary, y = r)) +
geom_point(aes(shape = Primary), stroke = 1, size = 2,
position = position_jitter(0.1)) +
scale_shape_manual(name = "", values = c(1, 16), guide = "none") +
#scale_fill_manual(name = bquote(H[2]*O[2]), values = c("white", "gray80")) +
p.survival[-1] +
facet_wrap(~Genotype + H2O2, nrow = 1) +
panel_border(color = "black", size = 1.5) +
theme(axis.line = element_blank(),
axis.title.x = element_blank(),
strip.background = element_blank()
)
p
#ggsave("../output/20230301-fig3d-asr-cta1.png", width = 3.3, height = 3)
p <- ggplot(dat.f3a, aes(x = H2O2, y = r)) +
geom_point(aes(shape = Primary), stroke = 0.9, size = 2.5,
position = position_jitterdodge(jitter.width = 0.3, dodge.width = 0.9)) +
stat_summary(aes(group = Primary), position = position_dodge(0.9),
fun = mean, fun.max = mean, fun.min = mean,
geom = "crossbar", color = "red", width = 0.5) +
facet_wrap(~ Genotype, nrow = 1, scales = "free_x") +
scale_shape_manual(name = "Primary stress", values = c("Mock" = 1, "-Pi" = 16)) +
scale_y_continuous(labels = scales::percent) +
xlab(bquote(H[2]*O[2]~(mM))) + ylab("% survival") +
theme_cowplot(line_size = 1.2) +
panel_border(color = "black", size = 1.5) +
theme(strip.text = element_text(size = rel(1)),
strip.background = element_blank(),
#strip.placement = "inside",
axis.line = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "top",
legend.justification = "center",
legend.margin = margin(b = -10),
legend.text = element_text(size = rel(0.9)),
legend.title = element_text(size = rel(0.9)))
p
ggsave("../output/20230301-fig3d-asr-cta1.png", width = 3.5, height = 3.5)
Statistical tests
Basal survival rate
Between wt and cta1∆, unpaired, rank-sum test.
tmp <- dat.f3a %>%
filter(Primary == "Mock") %>%
select(Date, Genotype, r) %>%
arrange(Genotype)
tmp %>% group_by(Genotype) %>% summarize(mean = mean(r), sd = num(sd(r), digits = 3))
wilcox.test(r ~ Genotype, paired = FALSE, data = tmp)
Wilcoxon rank sum exact test
data: r by Genotype
W = 12, p-value = 0.3939
alternative hypothesis: true location shift is not equal to 0
basal survival rates not significantly different between genotypes at the chosen [H2O2]
Survival rate with the primary stress
Between wt and cta1∆, unpaired, rank-sum test.
tmp <- dat.f3a %>%
filter(Primary == "-Pi") %>%
select(Date, Genotype, r) %>%
arrange(Genotype)
tmp %>% group_by(Genotype) %>% summarize(mean = mean(r), sd = num(sd(r), digits = 3))
wilcox.test(r ~ Genotype, paired = FALSE, data = tmp)
Wilcoxon rank sum exact test
data: r by Genotype
W = 36, p-value = 0.002165
alternative hypothesis: true location shift is not equal to 0
survival rates with primary stress significantly different between wt and cta1∆
Primary stress enhanced in wildtype
Comparison between r and r’, paired, signed-rank test.
tmp <- dat.f3a %>%
filter(Genotype == "wildtype") %>%
pivot_wider(id_cols = c(Date, Strain), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/Mock)
tmp
x <- Hmisc::smean.cl.boot(tmp$ASR)
sprintf("ASR_score mean = %.2f, 95%% CI by bootstrap = [%.2f, %.2f]", x[1], x[2], x[3])
with(tmp, t.test(as.numeric(`-Pi`), as.numeric(Mock), paired = TRUE, alternative = "g"))
with(tmp, wilcox.test(`-Pi`, Mock, paired = TRUE, alternative = "g"))
with(tmp, wilcox.test(`-Pi`, Mock, paired = FALSE, alternative = "g"))
Primary stress effect in cta1Δ
Paired, signed-rank test
tmp <- dat.f3a %>%
filter(Genotype == "cta1Δ") %>%
pivot_wider(id_cols = c(Date, Strain), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/Mock)
tmp
x <- Hmisc::smean.cl.boot(tmp$ASR)
sprintf("ASR_score mean = %.2f, 95%% CI by bootstrap = [%.2f, %.2f]", x[1], x[2], x[3])
with(tmp, t.test(as.numeric(`-Pi`), as.numeric(Mock), paired = TRUE, alternative = "g"))
with(tmp, wilcox.test(`-Pi`, Mock, paired = TRUE, alternative = "g"))
with(tmp, wilcox.test(`-Pi`, Mock, paired = FALSE, alternative = "g"))
There is a statistically significant ASR effect in cta1∆, but the effect is very mild
Comparing the ASR effect in cta1Δ vs wild type
Unpaired, rank-sum test
tmp <- dat.f3a %>%
#filter(Genotype == "cta1Δ") %>%
pivot_wider(id_cols = c(Date, Strain, Genotype), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/Mock) %>%
arrange(Genotype)
tmp %>% group_by(Genotype) %>%
summarize(ASR_score = paste(round(as.numeric(ASR),1), collapse = ", "),
mean = mean(ASR), sd = sd(ASR))
t.test(as.numeric(ASR) ~ Genotype, paired = FALSE, data = tmp)
wilcox.test(as.numeric(ASR) ~ Genotype, paired = FALSE, data = tmp)
The difference in ASR-score between cta1∆ and wild type is significant (P = 0.002)
Corresponding results for 80 mM vs 2.2 mM
dat.f3b <- dat.f3 %>%
mutate( Primary = factor(Primary, levels = c("M", "P"),
labels = c("Mock", "-Pi")),
Genotype = factor(Genotype, levels = c("wt", "cta1Δ"),
labels = c("wildtype", "cta1Δ")),
Group = factor(H2O2, levels = c("100 mM", "2.5 mM", "80 mM", "2.2 mM"),
labels = c("High", "High", "Medium", "Medium"))) %>%
filter(Group == "Medium") %>%
ungroup()
p <- ggplot(dat.f3b, aes(x = H2O2, y = r)) +
geom_point(aes(shape = Primary), stroke = 0.9, size = 2.5,
position = position_jitterdodge(jitter.width = 0.3, dodge.width = 0.9)) +
stat_summary(aes(group = Primary), position = position_dodge(0.9),
fun = mean, fun.max = mean, fun.min = mean,
geom = "crossbar", color = "red", width = 0.5) +
facet_wrap(~ Genotype, nrow = 1, scales = "free_x") +
scale_shape_manual(name = "Primary stress", values = c("Mock" = 1, "-Pi" = 16)) +
scale_y_continuous(labels = scales::percent) +
xlab(bquote(H[2]*O[2]~(mM))) + ylab("% survival") +
theme_cowplot(line_size = 1.2) +
panel_border(color = "black", size = 1) +
theme(strip.text = element_text(size = rel(1)),
strip.background = element_blank(),
#strip.placement = "inside",
axis.line = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "top",
legend.justification = "center",
legend.margin = margin(b = -10),
legend.text = element_text(size = rel(0.9)),
legend.title = element_text(size = rel(0.9)))
p
#ggsave("../output/20230301-f3b-asr-cta1.png", width = 3.5, height = 3.5)
Statistical tests
Basal survival rate Between genotypes, unpaired, rank-sum test.
tmp <- dat.f3b %>%
filter(Primary == "Mock") %>%
select(Date, Genotype, r) %>%
arrange(Genotype)
tmp %>% group_by(Genotype) %>% summarize(mean = mean(r), sd = num(sd(r), digits = 3))
wilcox.test(r ~ Genotype, paired = FALSE, data = tmp)
Wilcoxon rank sum exact test
data: r by Genotype
W = 7, p-value = 0.09307
alternative hypothesis: true location shift is not equal to 0
the basal survival rate is slightly higher in cta1∆. the difference is not statistically significant at a conventional 0.05 level.
Primary stress enhanced in wildtype
Comparison between r and r’, paired, signed-rank test.
tmp <- dat.f3b %>%
filter(Genotype == "wildtype") %>%
pivot_wider(id_cols = c(Date, Strain), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/Mock)
tmp
x <- Hmisc::smean.cl.boot(tmp$ASR)
sprintf("ASR_score mean = %.2f, 95%% CI by bootstrap = [%.2f, %.2f]", x[1], x[2], x[3])
[1] "ASR_score mean = 4.46, 95% CI by bootstrap = [3.76, 5.16]"
with(tmp, t.test(as.numeric(`-Pi`), as.numeric(Mock), paired = TRUE, alternative = "g"))
Paired t-test
data: as.numeric(`-Pi`) and as.numeric(Mock)
t = 8.9447, df = 5, p-value = 0.0001455
alternative hypothesis: true mean difference is greater than 0
95 percent confidence interval:
0.09212352 Inf
sample estimates:
mean difference
0.1189116
with(tmp, wilcox.test(`-Pi`, Mock, paired = TRUE, alternative = "g"))
Wilcoxon signed rank exact test
data: -Pi and Mock
V = 21, p-value = 0.01563
alternative hypothesis: true location shift is greater than 0
Primary stress effect in cta1Δ
Paired, signed-rank test
tmp <- dat.f3b %>%
filter(Genotype == "cta1Δ") %>%
pivot_wider(id_cols = c(Date, Strain), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/Mock)
tmp
x <- Hmisc::smean.cl.boot(tmp$ASR)
sprintf("ASR_score mean = %.2f, 95%% CI by bootstrap = [%.2f, %.2f]", x[1], x[2], x[3])
[1] "ASR_score mean = 0.93, 95% CI by bootstrap = [0.57, 1.40]"
with(tmp, t.test(as.numeric(`-Pi`), as.numeric(Mock), paired = TRUE, alternative = "g"))
Paired t-test
data: as.numeric(`-Pi`) and as.numeric(Mock)
t = -0.84267, df = 5, p-value = 0.7811
alternative hypothesis: true mean difference is greater than 0
95 percent confidence interval:
-0.02818646 Inf
sample estimates:
mean difference
-0.00831149
with(tmp, wilcox.test(`-Pi`, Mock, paired = TRUE, alternative = "g"))
Wilcoxon signed rank exact test
data: -Pi and Mock
V = 7, p-value = 0.7812
alternative hypothesis: true location shift is greater than 0
no significant effect of ASR in cta1∆ at this [H2O2] concentration.
Comparing the ASR effect in cta1Δ vs wild type
Unpaired, rank-sum test
tmp <- dat.f3b %>%
#filter(Genotype == "cta1Δ") %>%
pivot_wider(id_cols = c(Date, Strain, Genotype), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/Mock) %>%
arrange(Genotype)
tmp %>% group_by(Genotype) %>%
summarize(ASR_score = paste(round(as.numeric(ASR),1), collapse = ", "),
mean = mean(ASR), sd = sd(ASR))
t.test(as.numeric(ASR) ~ Genotype, paired = FALSE, data = tmp)
Welch Two Sample t-test
data: as.numeric(ASR) by Genotype
t = 7.5856, df = 8.1316, p-value = 5.858e-05
alternative hypothesis: true difference in means between group wildtype and group cta1Δ is not equal to 0
95 percent confidence interval:
2.460004 4.600269
sample estimates:
mean in group wildtype mean in group cta1Δ
4.4588479 0.9287116
wilcox.test(as.numeric(ASR) ~ Genotype, paired = FALSE, data = tmp)
Wilcoxon rank sum exact test
data: as.numeric(ASR) by Genotype
W = 36, p-value = 0.002165
alternative hypothesis: true location shift is not equal to 0
The difference in ASR-score between the cta1∆ and wild type is significant (P = 0.002)
Goal
Experiment
Data
| Species | Strain | Genotype | H2O2 |
|---|---|---|---|
| C. glabrata | yH181 | wildtype | 60, 80, 100 mM |
| C. glabrata | yH271 | cta1∆ | 2., 2.5, 3. mM |
| C. glabrata | yH285 | cta1::CTA1. | 60, 80, 100 mM |
Six replicates, between 7/31/23 - 8/4/23
Analysis
tmp <- read_tsv("../input/20230806-fig-3sup-cta1-comp-data-ht.tsv",
col_types = cols(), comment = "#") %>%
mutate(
scaled = Count * Dilutions * 1e-2
)
dat.f3s <- tmp %>%
# remove uninformative columns. only one H2O2 conc used for each species
select(-Len_1, -Len_2, -Species) %>%
# group by experiment and H2O2 to calculate r (MO/MM) or r' (PO/PM)
separate(Group, into = c("Primary", "Secondary"), sep = 1) %>%
group_by(Index, Strain, Primary) %>%
# calculate % survival
mutate(scaled_M = scaled[Secondary == "M"],
r = num(scaled / scaled[Secondary == "M"], digits = 3)) %>%
# remove the secondary mock as the information are all used
filter(Secondary != "M")
dat.f3s
dat.f3s %>%
# leave out the lowest concentration in each group
filter(!H2O2 %in% c("2 mM", "60 mM")) %>%
mutate(
Primary = factor(Primary, levels = c("M", "P"),
labels = c("Mock", "-Pi")),
Genotype = factor(Genotype, levels = c("wt", "Cgcta1::CgCTA1", "cta1::URA3"),
labels = c("wildtype", "cta1::CTA1", "cta1Δ"))
) %>%
ggplot(aes(x = H2O2, y = r)) + #p.survival[-3] +
geom_point(aes(shape = Primary), stroke = 1, size = 2,
position = position_dodge(0.7)) +
stat_summary(aes(group = Primary), position = position_dodge(0.7),
fun = mean, fun.max = mean, fun.min = mean,
geom = "crossbar", color = "red", width = 0.5) +
facet_wrap(~ Genotype, nrow = 1, scales = "free_x") +
scale_shape_manual(values = c("Mock" = 1, "-Pi" = 16)) +
scale_y_continuous(labels = scales::percent) +
xlab("Primary stress (45 min)") + ylab("% survival") +
theme_bw(base_size = 14, base_line_size = 1) +
panel_border(color = "black", size = 1) +
theme(strip.text = element_text(size = rel(1), face = 3),
strip.background = element_blank(),
legend.position = c(0.9, 0.8),
legend.background = element_blank())
By trial and error, I found 80 mM for the wildtype, 100 mM for the complement strain and 3 mM for the deletion had relatively similar basal survival rates.
dat.f3s1 <- dat.f3s %>%
mutate(Group = paste(Genotype, H2O2, sep = "_")) %>%
#filter(H2O2 %in% c("100 mM", "3 mM")) %>%
filter(Group %in% c("wt_80 mM", "Cgcta1::CgCTA1_100 mM", "cta1::URA3_3 mM")) %>%
select(-Group) %>%
mutate(
Primary = factor(Primary, levels = c("M", "P"),
labels = c("Mock", "-Pi")),
Genotype = factor(Genotype, levels = c("wt", "Cgcta1::CgCTA1", "cta1::URA3"),
labels = c("wildtype", "cta1::CTA1", "cta1Δ")))
#write_tsv(dat.f3a, file = "../input/20230520-fig-3d-data-hb.tsv")
p <- dat.f3s1 %>%
mutate(Genotype = fct_recode(Genotype, "WT" = "wildtype")) %>%
ggplot(aes(x = Primary, y = r)) +
geom_point(aes(shape = Primary), stroke = 1, size = 2,
position = position_jitter(0.1)) +
scale_shape_manual(name = "", values = c(1, 16), guide = "none") +
#scale_fill_manual(name = bquote(H[2]*O[2]), values = c("white", "gray80")) +
p.survival[-1] +
facet_wrap(~Genotype + H2O2, nrow = 1) +
panel_border(color = "black", size = 1.5) +
theme(axis.line = element_blank(),
axis.title.x = element_blank(),
strip.background = element_blank()
)
p
#ggsave("../output/20230806-fig3sup-asr-cta1-comp-1.png", width = 5, height = 3)
#p <-
dat.f3s1 %>%
mutate(
Genotype = fct_recode(Genotype, "WT" = "wildtype"),
Group = paste(Genotype, H2O2, sep = "\n"),
r = as.numeric(r)) %>%
pivot_wider(id_cols = c(Index, Group), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi` / Mock, Group = fct_relevel(Group, "WT\n80 mM")) %>%
ggplot(aes(x = Group, y = ASR)) +
geom_point(stroke = 1, size = 2,
position = position_jitter(0.1)) +
stat_summary(fun = mean, fun.max = mean, fun.min = mean,
geom = "crossbar", color = "red", width = 0.5) +
scale_y_continuous(labels = scales::percent) +
ylab("% survival") +
theme_cowplot(line_size = 0.7, font_size = 14) +
panel_border(color = "black", size = 1.5) +
theme(axis.line = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_text(face = 2, size = rel(1.2)),
strip.background = element_blank()
)
Basal survival rate
Because we have three groups here, my strategy is to first perform an ANOVA-like, but rank-based, Krusal-Wallis test, where the null hypothesis is that all three groups’ distribution has the same “location” (=mean). I also performed a pairwise Wilcoxson rank sum test, although that is not necessary if the first test is deemed insignificant.
tmp <- dat.f3s1 %>%
filter(Primary == "Mock") %>%
select(Index, Genotype, r) %>%
mutate(r = as.double(r)) %>%
arrange(Genotype)
Adding missing grouping variables: `Strain`, `Primary`
tmp %>% group_by(Genotype) %>% summarize(mean = mean(r), sd = num(sd(r), digits = 3))
kruskal.test(r ~ Genotype, data = tmp)
Kruskal-Wallis rank sum test
data: r by Genotype
Kruskal-Wallis chi-squared = 1.9268, df = 2, p-value = 0.3816
pairwise.wilcox.test(x = tmp$r, g = tmp$Genotype)
Pairwise comparisons using Wilcoxon rank sum exact test
data: tmp$r and tmp$Genotype
wildtype cta1::CTA1
cta1::CTA1 0.4 -
cta1Δ 1.0 1.0
P value adjustment method: holm
basal survival rates not significantly different between genotypes at the chosen [H2O2], as shown by both the (ANOVA-equivalent, rank-based) Kruskal-Wallis test and the (t-test equivalent) pairwise Wilcoxson rank-sum test (unpaired). P-values from the latter were adjusted for multiple comparison using Holm’s method (uniformly more powerful than Bonferroni)
Survival rate with the primary stress
Same strategy as above
tmp <- dat.f3s1 %>%
filter(Primary == "-Pi") %>%
select(Index, Genotype, r) %>%
arrange(Genotype)
Adding missing grouping variables: `Strain`, `Primary`
tmp %>% group_by(Genotype) %>% summarize(mean = mean(r), sd = num(sd(r), digits = 3))
kruskal.test(as.double(r) ~ Genotype, data = tmp)
Kruskal-Wallis rank sum test
data: as.double(r) by Genotype
Kruskal-Wallis chi-squared = 9.6092, df = 2, p-value = 0.008192
pairwise.wilcox.test(x = tmp$r, g = tmp$Genotype, paired = FALSE)
Pairwise comparisons using Wilcoxon rank sum exact test
data: tmp$r and tmp$Genotype
wildtype cta1::CTA1
cta1::CTA1 0.589 -
cta1Δ 0.017 0.013
P value adjustment method: holm
survival rates with primary stress significantly different between cta1∆ vs either the wildtype or the complement, while no difference was detected between the latter two.
Primary stress enhanced in wildtype
Here, I first calculate the ASR score and its summary statistics. To determine if the effect is significant, I perform three types of tests: - T-test, most powerful but requiring the data to be normally distributed, which ours doesn’t. - Wilcoxson signed-rank test, paired - Wilcoxson rank-sum test (=Mann-Whitney’s U test), unpaired
In the paper, we report the P-values of the Wilcoxson signed-rank test
Comparison between r and r’, paired, signed-rank test.
tmp <- dat.f3s1 %>%
filter(Genotype == "wildtype") %>%
pivot_wider(id_cols = c(Index, Strain), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/Mock)
tmp
x <- Hmisc::smean.cl.boot(tmp$ASR)
sprintf("ASR_score mean = %.2f, 95%% CI by bootstrap = [%.2f, %.2f]", x[1], x[2], x[3])
[1] "ASR_score mean = 23.90, 95% CI by bootstrap = [15.10, 36.20]"
with(tmp, t.test(as.numeric(`-Pi`), as.numeric(Mock), paired = TRUE, alternative = "g"))
Paired t-test
data: as.numeric(`-Pi`) and as.numeric(Mock)
t = 3.574, df = 5, p-value = 0.007988
alternative hypothesis: true mean difference is greater than 0
95 percent confidence interval:
0.08322759 Inf
sample estimates:
mean difference
0.190803
with(tmp, wilcox.test(`-Pi`, Mock, paired = TRUE, alternative = "g"))
Wilcoxon signed rank exact test
data: -Pi and Mock
V = 21, p-value = 0.01563
alternative hypothesis: true location shift is greater than 0
Primary stress enhanced in CTA1 complement
Comparison between r and r’, paired, signed-rank test.
tmp <- dat.f3s1 %>%
filter(Genotype == "cta1::CTA1") %>%
pivot_wider(id_cols = c(Index, Strain), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/Mock)
tmp
x <- Hmisc::smean.cl.boot(tmp$ASR)
sprintf("ASR_score mean = %.2f, 95%% CI by bootstrap = [%.2f, %.2f]", x[1], x[2], x[3])
[1] "ASR_score mean = 15.39, 95% CI by bootstrap = [9.51, 21.90]"
with(tmp, t.test(as.numeric(`-Pi`), as.numeric(Mock), paired = TRUE, alternative = "g"))
Paired t-test
data: as.numeric(`-Pi`) and as.numeric(Mock)
t = 7.3528, df = 5, p-value = 0.0003652
alternative hypothesis: true mean difference is greater than 0
95 percent confidence interval:
0.1379887 Inf
sample estimates:
mean difference
0.1900807
with(tmp, wilcox.test(`-Pi`, Mock, paired = TRUE, alternative = "g"))
Wilcoxon signed rank exact test
data: -Pi and Mock
V = 21, p-value = 0.01563
alternative hypothesis: true location shift is greater than 0
Primary stress effect in cta1Δ
Paired, signed-rank test
tmp <- dat.f3s1 %>%
filter(Genotype == "cta1Δ") %>%
pivot_wider(id_cols = c(Index, Strain), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/Mock)
tmp
x <- Hmisc::smean.cl.boot(tmp$ASR)
sprintf("ASR_score mean = %.2f, 95%% CI by bootstrap = [%.2f, %.2f]", x[1], x[2], x[3])
[1] "ASR_score mean = 5.14, 95% CI by bootstrap = [2.00, 10.47]"
with(tmp, t.test(as.numeric(`-Pi`), as.numeric(Mock), paired = TRUE, alternative = "g"))
Paired t-test
data: as.numeric(`-Pi`) and as.numeric(Mock)
t = 3.0595, df = 4, p-value = 0.01884
alternative hypothesis: true mean difference is greater than 0
95 percent confidence interval:
0.006144732 Inf
sample estimates:
mean difference
0.02026672
with(tmp, wilcox.test(`-Pi`, Mock, paired = TRUE, alternative = "g"))
Wilcoxon signed rank exact test
data: -Pi and Mock
V = 15, p-value = 0.03125
alternative hypothesis: true location shift is greater than 0
There is a statistically significant ASR effect in cta1∆, but the effect is very mild
Comparing the ASR effect in cta1Δ vs wild type
Unpaired, rank-sum test
tmp <- dat.f3s1 %>%
#filter(Genotype == "cta1Δ") %>%
pivot_wider(id_cols = c(Index, Strain, Genotype), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/Mock) %>%
arrange(Genotype)
tmp %>% group_by(Genotype) %>%
summarize(ASR_score = paste(round(as.numeric(ASR),1), collapse = ", "),
mean = mean(ASR), sd = sd(ASR))
kruskal.test(as.numeric(ASR) ~ Genotype, data = tmp)
Kruskal-Wallis rank sum test
data: as.numeric(ASR) by Genotype
Kruskal-Wallis chi-squared = 7.4588, df = 2, p-value = 0.02401
pairwise.wilcox.test(x = as.numeric(tmp$ASR), g = tmp$Genotype, paired = FALSE, p.adjust.method = "none")
Pairwise comparisons using Wilcoxon rank sum exact test
data: as.numeric(tmp$ASR) and tmp$Genotype
wildtype cta1::CTA1
cta1::CTA1 0.240 -
cta1Δ 0.017 0.052
P value adjustment method: none
The tests of interests among the pairwise comparisons are between the wild type and the other two strains Applying Bonferroni correction to the two tests, we conclude that cta1∆ is significantly different from the wild type in its ASR effect (P = 0.034), while the difference between cta1::CTA1 and wild type is not (P = 0.48).
A second set of concentrations
dat.f3s2 <- dat.f3s %>%
mutate(Group = paste(Genotype, H2O2, sep = "_")) %>%
#filter(H2O2 %in% c("100 mM", "3 mM")) %>%
filter(Group %in% c("wt_60 mM", "Cgcta1::CgCTA1_80 mM", "cta1::URA3_2.5 mM")) %>%
select(-Group) %>%
mutate(
Primary = factor(Primary, levels = c("M", "P"),
labels = c("Mock", "-Pi")),
Genotype = factor(Genotype, levels = c("wt", "Cgcta1::CgCTA1", "cta1::URA3"),
labels = c("wildtype", "cta1::CTA1", "cta1Δ")))
#write_tsv(dat.f3a, file = "../input/20230520-fig-3d-data-hb.tsv")
p <- dat.f3s2 %>%
mutate(Genotype = fct_recode(Genotype, "WT" = "wildtype")) %>%
ggplot(aes(x = Primary, y = r)) +
geom_point(aes(shape = Primary), stroke = 1, size = 2,
position = position_jitter(0.1)) +
scale_shape_manual(name = "", values = c(1, 16), guide = "none") +
#scale_fill_manual(name = bquote(H[2]*O[2]), values = c("white", "gray80")) +
p.survival[-1] +
facet_wrap(~Genotype + H2O2, nrow = 1) +
panel_border(color = "black", size = 1.5) +
theme(axis.line = element_blank(),
axis.title.x = element_blank(),
strip.background = element_blank()
)
p
ggsave("../output/20230806-fig3sup-asr-cta1-comp-2.png", width = 5, height = 3)
Statistical tests
Basal survival rate
Between wt and cta1∆, unpaired, rank-sum test.
tmp <- dat.f3s2 %>%
filter(Primary == "Mock") %>%
select(Index, Genotype, r) %>%
mutate(r = as.double(r)) %>%
arrange(Genotype)
tmp %>% group_by(Genotype) %>% summarize(mean = mean(r), sd = num(sd(r), digits = 3))
kruskal.test(r ~ Genotype, data = tmp)
pairwise.wilcox.test(x = tmp$r, g = tmp$Genotype)
basal survival rates not significantly different between genotypes at the chosen [H2O2]
Survival rate with the primary stress
Between genotypes, unpaired, rank-sum test.
tmp <- dat.f3s2 %>%
filter(Primary == "-Pi") %>%
select(Index, Genotype, r) %>%
arrange(Genotype)
tmp %>% group_by(Genotype) %>% summarize(mean = mean(r), sd = num(sd(r), digits = 3))
kruskal.test(as.double(r) ~ Genotype, data = tmp)
pairwise.wilcox.test(x = tmp$r, g = tmp$Genotype, paired = FALSE)
survival rates with primary stress significantly different between cta1∆ vs either the wildtype or the complement. No significant difference between the latter two.
Primary stress enhanced in wildtype
Comparison between r and r’, paired, signed-rank test.
tmp <- dat.f3s2 %>%
filter(Genotype == "wildtype") %>%
pivot_wider(id_cols = c(Index, Strain), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/Mock)
tmp
x <- Hmisc::smean.cl.boot(tmp$ASR)
sprintf("ASR_score mean = %.2f, 95%% CI by bootstrap = [%.2f, %.2f]", x[1], x[2], x[3])
with(tmp, t.test(as.numeric(`-Pi`), as.numeric(Mock), paired = TRUE, alternative = "g"))
with(tmp, wilcox.test(`-Pi`, Mock, paired = TRUE, alternative = "g"))
Primary stress enhanced in CTA1 complement
Comparison between r and r’, paired, signed-rank test.
tmp <- dat.f3s2 %>%
filter(Genotype == "cta1::CTA1") %>%
pivot_wider(id_cols = c(Index, Strain), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/Mock)
tmp
x <- Hmisc::smean.cl.boot(tmp$ASR)
sprintf("ASR_score mean = %.2f, 95%% CI by bootstrap = [%.2f, %.2f]", x[1], x[2], x[3])
with(tmp, t.test(as.numeric(`-Pi`), as.numeric(Mock), paired = TRUE, alternative = "g"))
with(tmp, wilcox.test(`-Pi`, Mock, paired = TRUE, alternative = "g"))
Primary stress effect in cta1Δ
Paired, signed-rank test
tmp <- dat.f3s2 %>%
filter(Genotype == "cta1Δ") %>%
pivot_wider(id_cols = c(Index, Strain), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/Mock)
tmp
x <- Hmisc::smean.cl.boot(tmp$ASR)
sprintf("ASR_score mean = %.2f, 95%% CI by bootstrap = [%.2f, %.2f]", x[1], x[2], x[3])
with(tmp, t.test(as.numeric(`-Pi`), as.numeric(Mock), paired = TRUE, alternative = "g"))
with(tmp, wilcox.test(`-Pi`, Mock, paired = TRUE, alternative = "g"))
ASR effect not significant in cta1∆.
Comparing the ASR effect in cta1Δ vs wild type
Unpaired, rank-sum test
tmp <- dat.f3s2 %>%
#filter(Genotype == "cta1Δ") %>%
pivot_wider(id_cols = c(Index, Strain, Genotype), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/Mock) %>%
arrange(Genotype)
tmp %>% group_by(Genotype) %>%
summarize(ASR_score = paste(round(as.numeric(ASR),1), collapse = ", "),
mean = mean(ASR), sd = sd(ASR))
kruskal.test(as.numeric(ASR) ~ Genotype, data = tmp)
pairwise.wilcox.test(x = as.numeric(tmp$ASR), g = tmp$Genotype, paired = FALSE, p.adjust.method = "none")
The tests of interests among the pairwise comparisons are between the wild type and the other two strains Applying Bonferroni correction to the two tests, we conclude that cta1∆ is significantly different from the wild type in its ASR effect (P = 0.0044), while the difference between cta1::CTA1 and wild type is not (P = 1).
Goal
Determine if Rim15 is part of the signaling network behind the ASR.
Rationale
Rim15 is the yeast homolog of the Greatwall kinase, regulating multiple downstream processes and is itself regulated by at least three major kinase complexes. This puts it at a crossroad of signal integration. One of the upstream kinases regulating Rim15 is TORC1, which we later found to be connected with the ASR.
Data
| Species | Strain | Genotype | H2O2 |
|---|---|---|---|
| C. glabrata | yH001, yH002 | wildtype | 80, 100 mM |
| C. glabrata | yH609, yH610 | rim15∆ | 60, 80 mM |
Eight replicates (at two [H2O2]), two each from 06/28, 06/29 and 07/13, plus one each from 07/02 and 07/04 of 2022. The data from 7/4 should be excluded because Jinye noted media contamination on that day.
Reformat JY’s data into a tidy format. Note that the following data
file contains the same information as in the raw object
imported at the beginning of this rmarkdown. The file below, however,
contains experimental notes, including 7/4/22 data having media
contamination issues.
tmp <- read_tsv("../input/20230522-rim15-data-jy.tsv", col_types = cols(), comment = "#") %>%
mutate(Date = gsub("(\\d\\d)(\\d\\d)(\\d\\d)", "\\1/\\2/\\3", Date)) %>%
select(-`MO/MM_percent`, -`PO/PM_percent`)
# data sanity check, quick view
sapply(select(tmp, Date, Species, Strain, Genotype, Len_1, Len_2, H2O2), unique)
$Date
[1] "07/04/22" "07/02/22" "06/29/22" "06/28/22" "07/13/22"
$Species
[1] "Cg"
$Strain
[1] "yH001" "yH002" "yH609" "yH610"
$Genotype
[1] "wt" "rim15Δ"
$Len_1
[1] "45 min"
$Len_2
[1] "2 hr"
$H2O2
[1] "0 mM" "80 mM" "100 mM" "60 mM"
dat.f6b <- tmp %>%
pivot_longer(c(Count_MO_MM, Count_PO_PM), names_to = "Primary", values_to = "Count") %>%
mutate(Primary = fct_recode(Primary, "Mock" = "Count_MO_MM", "-Pi" = "Count_PO_PM"),
scaled = Count * Dilutions * 1e-3) %>%
group_by(Date, Strain, Genotype, Primary) %>%
mutate(scaled_M = scaled[H2O2 == "0 mM"],
r = scaled / scaled_M) %>%
ungroup() %>%
select(Date, Species, Strain, Genotype, Primary, H2O2, Dilutions,
Count, scaled, scaled_M, r, Prism_data_point, Note) %>%
arrange(Date, Genotype, Primary)
Save the data for future references
write_tsv(dat.f6b, file = "../input/20230523-fig-6-data-hb.tsv")
Below the same data is extracted from the large dataset. The count information should be the same.
use.f6 <- paste0(c("06/28", "06/29", "07/02", "07/13"), "/22")
tmp <- raw %>%
filter(Date %in% use.f6) %>%
mutate(
scaled = Count * Dilutions * 1e-2
) %>%
# remove uninformative columns. only one H2O2 conc used for each species
select(-Len_1, -Len_2, -Experimenter)
# Assume the replicates were paired in the order they appear in the table,
# i.e., the first row in the MO, MM, PO, PM groups belong to the same biological
# replicate, we can derive three ASR_scores for each date x species x Len_1
dat.f6 <- tmp %>%
# group by primary to calculate r (MO/MM) or r' (PO/PM)
separate(Group, into = c("Primary", "Secondary"), sep = 1) %>%
group_by(Date, Strain, Primary) %>%
# calculate % survival
mutate(scaled_M = scaled[Secondary == "M"],
r = scaled / scaled[Secondary == "M"]) %>%
# remove the secondary mock as the information are all used
filter(Secondary != "M")
dat.f6
Annotate the data. For 7/2 and 7/4 data, only use yH001 for the wt, so we end up with the same number of replicates for both strains.
dat.f6p <- dat.f6b %>%
filter(H2O2 != "0 mM",
!(Date == "07/02/22" & Strain == "yH002"), Date != "07/04/22") %>%
mutate(
Genotype = factor(Genotype, levels = c("wt", "rim15Δ")),
group = paste(Genotype, gsub(" ", "", H2O2), sep = "_"),
Secondary = factor(group,
levels = c("wt_100mM", "wt_80mM", "rim15Δ_80mM", "rim15Δ_60mM"),
labels = c("High", "Medium", "High", "Medium"))
)
with(dat.f6p, table(Date, group))
group
Date rim15Δ_60mM rim15Δ_80mM wt_100mM wt_80mM
06/28/22 4 4 4 4
06/29/22 4 4 4 4
07/02/22 2 2 2 2
07/13/22 4 4 4 4
dat.f6p %>%
ggplot(aes(x = H2O2, y = r)) + #p.survival[-3] +
geom_point(aes(shape = Primary), stroke = 1, size = 2,
position = position_jitterdodge(jitter.width = 0.2, dodge.width = 0.9)) +
stat_summary(aes(group = Primary), position = position_dodge(0.9),
fun = mean, fun.max = mean, fun.min = mean,
geom = "crossbar", color = "red", width = 0.5) +
facet_wrap(~ Secondary + Genotype, nrow = 1, scales = "free_x") +
scale_shape_manual(values = c("Mock" = 1, "-Pi" = 16)) +
scale_y_continuous(labels = scales::percent) +
xlab(bquote(H[2]*O[2]~(mM))) + ylab("% survival") +
theme_bw(base_size = 14, base_line_size = 1) +
panel_border(color = "black", size = 1) +
theme(strip.text = element_text(size = rel(1), face = 3),
strip.background = element_blank())
Statistical test for differences in basal survival between genotypes. I first applied the Kruskal-Wallis test, which is an extension of the Mann-Whitney’s U test for multiple groups and is equivalent to ANOVA but applied on ranks. I used this test to determine if there is evidence for differences in the basal survival rates among the four groups (2 genotypes x 2 [H2O2]).
tmp <- dat.f6p %>% filter(Primary == "Mock")
tmp %>% group_by(group) %>%
summarize(
test = Hmisc::smean.cl.normal(r) %>% t() %>% as_tibble()
) %>%
unnest(test) %>%
mutate(across(where(is.double), round, digits = 3))
kruskal.test(r ~ group, data = tmp)
Kruskal-Wallis rank sum test
data: r by group
Kruskal-Wallis chi-squared = 15.726, df = 3, p-value = 0.00129
There are differences among the four groups. Inspecting the statistical summary, we can see that the high and low concentrations used for each genotype seem to match each other well. Therefore, we will next use A Wilcoxon rank-sum test to compare the two genotypes at either the high or the low dose
Statistical test for differences in basal survival between genotypes at 100 / 80 mM.
tmp <- dat.f6p %>%
filter(group %in% c("wt_100mM", "rim15Δ_80mM"), Primary == "Mock")
wilcox.test(r ~ group, data = tmp, paired = FALSE)
Wilcoxon rank sum exact test
data: r by group
W = 19, p-value = 0.535
alternative hypothesis: true location shift is not equal to 0
Statistical test for differences in basal survival between genotypes at 80/60 mM.
tmp <- dat.f6p %>%
filter(group %in% c("wt_80mM", "rim15Δ_60mM"), Primary == "Mock")
wilcox.test(r ~ group, data = tmp, paired = FALSE)
Wilcoxon rank sum exact test
data: r by group
W = 12, p-value = 0.1282
alternative hypothesis: true location shift is not equal to 0
yes, the survival rates are comparable between species at the two pairs of chosen [H2O2] We can conclude that there is no significant difference in survival when comparing 100 mM vs 80 mM for wt and rim15∆. Similarly, there is no significant difference in survival when 80 mM and 60 mM were used to treat the two strains, respectively.
dat.f6p %>% filter(Secondary == "High") %>%
mutate(Genotype = fct_recode(Genotype, `WT` = "wt")) %>%
ggplot(aes(x = Primary, y = r)) +
#geom_line(aes(group = paste0(Date, Strain, Secondary)),
# linetype = 2, linewidth = 0.1) +
geom_point(aes(shape = Primary), stroke = 1, size = 2,
position = position_jitter(0.05)) +
scale_shape_manual(values = c(1, 16), guide = "none") +
p.survival[-1] +
facet_wrap(~Genotype + H2O2, nrow = 1) +
panel_border(color = "black", size = 1.5) +
theme(axis.line = element_blank(),
axis.title.x = element_blank(),
strip.background = element_blank(),
legend.position = "top",
legend.justification = "center",
legend.text = element_text(size = rel(0.9)),
legend.title = element_text(size = rel(0.9)))
#ggsave("../output/20230522-rim15-vs-wt-ASR.png", width = 3.6, height = 3.4)
Basal survival rate
Repeating the test above:
Statistical test for differences in basal survival between genotypes at 100 / 80 mM.
tmp <- dat.f6p %>%
filter(Secondary == "High", Primary == "Mock")
wilcox.test(r ~ Genotype, data = tmp, paired = FALSE)
Wilcoxon rank sum exact test
data: r by Genotype
W = 30, p-value = 0.535
alternative hypothesis: true location shift is not equal to 0
ASR in wildtype
Comparison between r and r’, paired, signed-rank test.
tmp <- dat.f6p %>% filter(Secondary == "High") %>%
filter(Genotype == "wt") %>%
pivot_wider(id_cols = c(Date, Strain, H2O2), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/Mock)
tmp
x <- Hmisc::smean.cl.boot(tmp$ASR)
sprintf("ASR_score mean = %.2f, 95%% CI by bootstrap = [%.2f, %.2f]", x[1], x[2], x[3])
[1] "ASR_score mean = 6.29, 95% CI by bootstrap = [4.59, 8.59]"
with(tmp, t.test(as.numeric(`-Pi`), as.numeric(Mock), paired = TRUE, alternative = "g"))
Paired t-test
data: as.numeric(`-Pi`) and as.numeric(Mock)
t = 11.953, df = 6, p-value = 1.039e-05
alternative hypothesis: true mean difference is greater than 0
95 percent confidence interval:
0.1108889 Inf
sample estimates:
mean difference
0.1324153
with(tmp, wilcox.test(`-Pi`, Mock, paired = TRUE, alternative = "g"))
Wilcoxon signed rank exact test
data: -Pi and Mock
V = 28, p-value = 0.007813
alternative hypothesis: true location shift is greater than 0
ASR in rim15Δ
Paired, signed-rank test
tmp <- dat.f6p %>% filter(Secondary == "High") %>%
filter(Genotype == "rim15Δ") %>%
pivot_wider(id_cols = c(Date, Strain, H2O2), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/Mock)
tmp
x <- Hmisc::smean.cl.boot(tmp$ASR)
sprintf("ASR_score mean = %.2f, 95%% CI by bootstrap = [%.2f, %.2f]", x[1], x[2], x[3])
[1] "ASR_score mean = 3.45, 95% CI by bootstrap = [2.57, 4.33]"
with(tmp, t.test(as.numeric(`-Pi`), as.numeric(Mock), paired = TRUE, alternative = "g"))
Paired t-test
data: as.numeric(`-Pi`) and as.numeric(Mock)
t = 5.6886, df = 6, p-value = 0.0006367
alternative hypothesis: true mean difference is greater than 0
95 percent confidence interval:
0.03364967 Inf
sample estimates:
mean difference
0.0511075
with(tmp, wilcox.test(`-Pi`, Mock, paired = TRUE, alternative = "g"))
Wilcoxon signed rank exact test
data: -Pi and Mock
V = 28, p-value = 0.007813
alternative hypothesis: true location shift is greater than 0
ASR effect size between wt and rim15∆
tmp <- dat.f6p %>%
filter(Secondary == "High") %>%
pivot_wider(id_cols = c(Date, Strain, Genotype), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/Mock) %>%
arrange(Genotype)
tmp %>% group_by(Genotype) %>%
summarize(ASR_score = paste(round(as.numeric(ASR),1), collapse = ", "),
mean = mean(ASR), sd = sd(ASR))
t.test(as.numeric(ASR) ~ Genotype, paired = FALSE, data = tmp)
Welch Two Sample t-test
data: as.numeric(ASR) by Genotype
t = 2.3448, df = 8.4269, p-value = 0.04551
alternative hypothesis: true difference in means between group wt and group rim15Δ is not equal to 0
95 percent confidence interval:
0.07146653 5.61211996
sample estimates:
mean in group wt mean in group rim15Δ
6.290364 3.448570
wilcox.test(as.numeric(ASR) ~ Genotype, paired = FALSE, data = tmp)
Wilcoxon rank sum exact test
data: as.numeric(ASR) by Genotype
W = 41, p-value = 0.03788
alternative hypothesis: true location shift is not equal to 0
The difference in the ASR-score between rim15∆ and wild type is significant at a 0.05 level (P = 0.04).
Goal
Experiment
Data
| Species | Strain | Genotype | H2O2 |
|---|---|---|---|
| C. glabrata | yH001,002 | wildtype | 60, 80, 100 mM |
| C. glabrata | yH609,610 | rim51∆ | 60, 80, 100 mM |
| C. glabrata | yH731,732 | rim15::RIM15 | 60, 80, 100 mM |
Experiments between 8/8/23 - 8/10/23
Analysis
tmp <- read_tsv("../input/20230808-rim15-complement-data-ht.tsv",
col_types = cols(), comment = "#") %>%
mutate(
scaled = Count * Dilutions * 1e-2
)
dat.f6s <- tmp %>%
# remove uninformative columns. only one H2O2 conc used for each species
select(-Len_1, -Len_2, -Species) %>%
# group by experiment and H2O2 to calculate r (MO/MM) or r' (PO/PM)
separate(Group, into = c("Primary", "Secondary"), sep = 1) %>%
group_by(Date, Strain, Primary) %>%
# calculate % survival
mutate(scaled_M = scaled[Secondary == "M"],
r = num(scaled / scaled[Secondary == "M"], digits = 3)) %>%
# remove the secondary mock as the information are all used
filter(Secondary != "M") %>%
mutate(
Primary = factor(Primary, levels = c("M", "P"),
labels = c("Mock", "-Pi")),
Genotype = factor(Genotype, levels = c("wt", "rim15::RIM15", "rim15::His3"),
labels = c("wildtype", "rim15::RIM15", "rim15Δ")),
H2O2 = factor(H2O2, levels = c("60 mM", "80 mM", "100 mM"))
)
dat.f6s
dat.f6s %>%
# leave out the lowest concentration in each group
#filter(!H2O2 %in% c("2 mM", "60 mM")) %>%
ggplot(aes(x = H2O2, y = r)) + #p.survival[-3] +
geom_point(aes(shape = Primary), stroke = 1, size = 2,
position = position_dodge(0.7)) +
stat_summary(aes(group = Primary), position = position_dodge(0.7),
fun = mean, fun.max = mean, fun.min = mean,
geom = "crossbar", color = "red", width = 0.5) +
facet_wrap(~ Genotype, nrow = 2, scales = "free_x") +
scale_shape_manual(values = c("Mock" = 1, "-Pi" = 16)) +
scale_y_continuous(labels = scales::percent) +
xlab("Primary stress (45 min)") + ylab("% survival") +
theme_bw(base_size = 14, base_line_size = 1) +
panel_border(color = "black", size = 1) +
theme(strip.text = element_text(size = rel(1), face = 3),
strip.background = element_blank(),
legend.position = c(0.6, 0.2),
legend.background = element_blank())
For this set, the same concentrations of H2O2 results in very similar basal survival rates. Here, we will test 60 mM for all three.
dat.f6s1 <- dat.f6s %>%
mutate(Group = paste(Genotype, H2O2, sep = "_")) %>%
filter(Group %in% c("wildtype_100 mM", "rim15Δ_80 mM", "rim15::RIM15_100 mM"))
#write_tsv(dat.f3a, file = "../input/20230520-fig-3d-data-hb.tsv")
p <- dat.f6s1 %>%
mutate(Genotype = fct_recode(Genotype, "WT" = "wildtype")) %>%
ggplot(aes(x = Primary, y = r)) +
geom_point(aes(shape = Primary), stroke = 1, size = 2,
position = position_jitter(0.1)) +
scale_shape_manual(name = "", values = c(1, 16), guide = "none") +
#scale_fill_manual(name = bquote(H[2]*O[2]), values = c("white", "gray80")) +
p.survival[-1] +
facet_wrap(~Genotype + H2O2, nrow = 1) +
panel_border(color = "black", size = 1.5) +
theme(axis.line = element_blank(),
axis.title.x = element_blank(),
strip.background = element_blank()
)
p
#ggsave("../output/20230818-fig6sup-asr-rim15-comp-1.png", width = 5, height = 3)
Basal survival rate
Because we have three groups here, my strategy is to first perform an ANOVA-like, but rank-based, Krusal-Wallis test, where the null hypothesis is that all three groups’ distribution has the same “location” (=mean). I also performed a pairwise Wilcoxson rank sum test, although that is not necessary if the first test is deemed insignificant.
tmp <- dat.f6s1 %>%
filter(Primary == "Mock") %>%
select(Date, Genotype, r) %>%
mutate(r = as.double(r)) %>%
arrange(Genotype)
Adding missing grouping variables: `Strain`, `Primary`
tmp %>% group_by(Genotype) %>% summarize(mean = mean(r), sd = num(sd(r), digits = 3))
kruskal.test(r ~ Genotype, data = tmp)
Kruskal-Wallis rank sum test
data: r by Genotype
Kruskal-Wallis chi-squared = 2.6082, df = 2, p-value = 0.2714
pairwise.wilcox.test(x = tmp$r, g = tmp$Genotype)
Pairwise comparisons using Wilcoxon rank sum exact test
data: tmp$r and tmp$Genotype
wildtype rim15::RIM15
rim15::RIM15 0.54 -
rim15Δ 0.82 0.54
P value adjustment method: holm
basal survival rates not significantly different between genotypes at the chosen [H2O2], as shown by both the (ANOVA-equivalent, rank-based) Kruskal-Wallis test and the (t-test equivalent) pairwise Wilcoxson rank-sum test (unpaired). P-values from the latter were adjusted for multiple comparison using Holm’s method (uniformly more powerful than Bonferroni)
Survival rate with the primary stress
Same strategy as above
tmp <- dat.f6s1 %>%
filter(Primary == "-Pi") %>%
select(Date, Genotype, r) %>%
arrange(Genotype)
Adding missing grouping variables: `Strain`, `Primary`
tmp %>% group_by(Genotype) %>% summarize(mean = mean(r), sd = num(sd(r), digits = 3))
kruskal.test(as.double(r) ~ Genotype, data = tmp)
Kruskal-Wallis rank sum test
data: as.double(r) by Genotype
Kruskal-Wallis chi-squared = 5.1345, df = 2, p-value =
0.07675
pairwise.wilcox.test(x = tmp$r, g = tmp$Genotype, paired = FALSE)
Pairwise comparisons using Wilcoxon rank sum exact test
data: tmp$r and tmp$Genotype
wildtype rim15::RIM15
rim15::RIM15 0.48 -
rim15Δ 0.12 0.26
P value adjustment method: holm
survival rates with primary stress are lower in rim15∆ than either the wildtype or the complement, but the difference is not significant
Primary stress enhanced in wildtype
Here, I first calculate the ASR score and its summary statistics. To determine if the effect is significant, I perform three types of tests: - T-test, most powerful but requiring the data to be normally distributed, which ours doesn’t. - Wilcoxson signed-rank test, paired - Wilcoxson rank-sum test (=Mann-Whitney’s U test), unpaired
In the paper, we report the P-values of the Wilcoxson signed-rank test
Comparison between r and r’, paired, signed-rank test.
tmp <- dat.f6s1 %>%
filter(Genotype == "wildtype") %>%
pivot_wider(id_cols = c(Date, Strain), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/Mock)
tmp
x <- Hmisc::smean.cl.boot(tmp$ASR)
sprintf("ASR_score mean = %.2f, 95%% CI by bootstrap = [%.2f, %.2f]", x[1], x[2], x[3])
[1] "ASR_score mean = 9.51, 95% CI by bootstrap = [5.31, 16.87]"
with(tmp, t.test(as.numeric(`-Pi`), as.numeric(Mock), paired = TRUE, alternative = "g"))
Paired t-test
data: as.numeric(`-Pi`) and as.numeric(Mock)
t = 6.4489, df = 5, p-value = 0.0006668
alternative hypothesis: true mean difference is greater than 0
95 percent confidence interval:
0.1402953 Inf
sample estimates:
mean difference
0.2040558
with(tmp, wilcox.test(`-Pi`, Mock, paired = TRUE, alternative = "g"))
Wilcoxon signed rank exact test
data: -Pi and Mock
V = 21, p-value = 0.01563
alternative hypothesis: true location shift is greater than 0
Primary stress enhanced in RIM15 complement
Comparison between r and r’, paired, signed-rank test.
tmp <- dat.f6s1 %>%
filter(Genotype == "rim15::RIM15") %>%
pivot_wider(id_cols = c(Date, Strain), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/Mock)
tmp
x <- Hmisc::smean.cl.boot(tmp$ASR)
sprintf("ASR_score mean = %.2f, 95%% CI by bootstrap = [%.2f, %.2f]", x[1], x[2], x[3])
[1] "ASR_score mean = 11.56, 95% CI by bootstrap = [8.86, 15.21]"
with(tmp, t.test(as.numeric(`-Pi`), as.numeric(Mock), paired = TRUE, alternative = "g"))
Paired t-test
data: as.numeric(`-Pi`) and as.numeric(Mock)
t = 6.1387, df = 5, p-value = 0.0008331
alternative hypothesis: true mean difference is greater than 0
95 percent confidence interval:
0.1232347 Inf
sample estimates:
mean difference
0.1834538
with(tmp, wilcox.test(`-Pi`, Mock, paired = TRUE, alternative = "g"))
Wilcoxon signed rank exact test
data: -Pi and Mock
V = 21, p-value = 0.01563
alternative hypothesis: true location shift is greater than 0
Primary stress effect in rim15Δ
Paired, signed-rank test
tmp <- dat.f6s1 %>%
filter(Genotype == "rim15Δ") %>%
pivot_wider(id_cols = c(Date, Strain), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/Mock)
tmp
x <- Hmisc::smean.cl.boot(tmp$ASR)
sprintf("ASR_score mean = %.2f, 95%% CI by bootstrap = [%.2f, %.2f]", x[1], x[2], x[3])
[1] "ASR_score mean = 5.32, 95% CI by bootstrap = [3.55, 7.71]"
with(tmp, t.test(as.numeric(`-Pi`), as.numeric(Mock), paired = TRUE, alternative = "g"))
Paired t-test
data: as.numeric(`-Pi`) and as.numeric(Mock)
t = 9.8572, df = 5, p-value = 9.157e-05
alternative hypothesis: true mean difference is greater than 0
95 percent confidence interval:
0.08289006 Inf
sample estimates:
mean difference
0.1041887
with(tmp, wilcox.test(`-Pi`, Mock, paired = TRUE, alternative = "g"))
Wilcoxon signed rank exact test
data: -Pi and Mock
V = 21, p-value = 0.01563
alternative hypothesis: true location shift is greater than 0
At this set of concentrations, all three strains showed significant ASR effects.
Comparing the ASR effect in rim15Δ,rim15::RIM15 and wild type
Unpaired, rank-sum test
tmp <- dat.f6s1 %>%
#filter(Genotype == "cta1Δ") %>%
pivot_wider(id_cols = c(Date, Strain, Genotype), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/Mock) %>%
arrange(Genotype)
tmp %>% group_by(Genotype) %>%
summarize(ASR_score = paste(round(as.numeric(ASR),1), collapse = ", "),
mean = mean(ASR), sd = sd(ASR))
kruskal.test(as.numeric(ASR) ~ Genotype, data = tmp)
Kruskal-Wallis rank sum test
data: as.numeric(ASR) by Genotype
Kruskal-Wallis chi-squared = 6.538, df = 2, p-value =
0.03804
pairwise.wilcox.test(x = as.numeric(tmp$ASR), g = tmp$Genotype, paired = FALSE, p.adjust.method = "holm")
Pairwise comparisons using Wilcoxon rank sum exact test
data: as.numeric(tmp$ASR) and tmp$Genotype
wildtype rim15::RIM15
rim15::RIM15 0.264 -
rim15Δ 0.310 0.045
P value adjustment method: holm
The tests of interests among the pairwise comparisons are between the wild type and the other two strains Applying Bonferroni correction to the two tests, we conclude that rim15∆ is significantly different from the wild type in its ASR effect (P = 0.034), while the difference between cta1::CTA1 and wild type is not (P = 0.48).
A second set of concentrations
dat.f6s2 <- dat.f6s %>%
mutate(Group = paste(Genotype, H2O2, sep = "_")) %>%
filter(Group %in% c("wildtype_80 mM", "rim15Δ_60 mM", "rim15::RIM15_80 mM"))
#write_tsv(dat.f3a, file = "../input/20230520-fig-3d-data-hb.tsv")
p <- dat.f6s2 %>%
mutate(Genotype = fct_recode(Genotype, "WT" = "wildtype")) %>%
ggplot(aes(x = Primary, y = r)) +
geom_point(aes(shape = Primary), stroke = 1, size = 2,
position = position_jitter(0.1)) +
scale_shape_manual(name = "", values = c(1, 16), guide = "none") +
#scale_fill_manual(name = bquote(H[2]*O[2]), values = c("white", "gray80")) +
p.survival[-1] +
facet_wrap(~Genotype + H2O2, nrow = 1) +
panel_border(color = "black", size = 1.5) +
theme(axis.line = element_blank(),
axis.title.x = element_blank(),
strip.background = element_blank()
)
p
ggsave("../output/20230818-fig6sup-asr-rim15-comp-2.png", width = 5, height = 3)
Statistical tests
Basal survival rate
Between wt and cta1∆, unpaired, rank-sum test.
tmp <- dat.f6s2 %>%
filter(Primary == "Mock") %>%
select(Date, Genotype, r) %>%
mutate(r = as.double(r)) %>%
arrange(Genotype)
tmp %>% group_by(Genotype) %>% summarize(mean = mean(r), sd = num(sd(r), digits = 3))
kruskal.test(r ~ Genotype, data = tmp)
pairwise.wilcox.test(x = tmp$r, g = tmp$Genotype)
basal survival rates not significantly different between genotypes at the chosen [H2O2]
Survival rate with the primary stress
Between genotypes, unpaired, rank-sum test.
tmp <- dat.f6s2 %>%
filter(Primary == "-Pi") %>%
select(Date, Genotype, r) %>%
arrange(Genotype)
tmp %>% group_by(Genotype) %>% summarize(mean = mean(r), sd = num(sd(r), digits = 3))
kruskal.test(as.double(r) ~ Genotype, data = tmp)
pairwise.wilcox.test(x = tmp$r, g = tmp$Genotype, paired = FALSE)
survival rates with primary stress is lower in rim15∆ than in the wild type or the complement strain. The difference is approaching statistical significance at a 0.05 level by Kruskal-Wallis rank sum test.
Primary stress enhanced in wildtype
Comparison between r and r’, paired, signed-rank test.
tmp <- dat.f6s2 %>%
filter(Genotype == "wildtype") %>%
pivot_wider(id_cols = c(Date, Strain), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/Mock)
tmp
x <- Hmisc::smean.cl.boot(tmp$ASR)
sprintf("ASR_score mean = %.2f, 95%% CI by bootstrap = [%.2f, %.2f]", x[1], x[2], x[3])
with(tmp, t.test(as.numeric(`-Pi`), as.numeric(Mock), paired = TRUE, alternative = "g"))
with(tmp, wilcox.test(`-Pi`, Mock, paired = TRUE, alternative = "g"))
Primary stress enhanced in RIM15 complement
Comparison between r and r’, paired, signed-rank test.
tmp <- dat.f6s2 %>%
filter(Genotype == "rim15::RIM15") %>%
pivot_wider(id_cols = c(Date, Strain), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/Mock)
tmp
x <- Hmisc::smean.cl.boot(tmp$ASR)
sprintf("ASR_score mean = %.2f, 95%% CI by bootstrap = [%.2f, %.2f]", x[1], x[2], x[3])
with(tmp, t.test(as.numeric(`-Pi`), as.numeric(Mock), paired = TRUE, alternative = "g"))
with(tmp, wilcox.test(`-Pi`, Mock, paired = TRUE, alternative = "g"))
Primary stress effect in rim15Δ
Paired, signed-rank test
tmp <- dat.f6s2 %>%
filter(Genotype == "rim15Δ") %>%
pivot_wider(id_cols = c(Date, Strain), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/Mock)
tmp
x <- Hmisc::smean.cl.boot(tmp$ASR)
sprintf("ASR_score mean = %.2f, 95%% CI by bootstrap = [%.2f, %.2f]", x[1], x[2], x[3])
with(tmp, t.test(as.numeric(`-Pi`), as.numeric(Mock), paired = TRUE, alternative = "g"))
with(tmp, wilcox.test(`-Pi`, Mock, paired = TRUE, alternative = "g"))
ASR effect not significant in cta1∆.
Comparing the ASR effect in rim15Δ,rim15::RIM15 and wild type
Unpaired, rank-sum test
tmp <- dat.f6s2 %>%
#filter(Genotype == "cta1Δ") %>%
pivot_wider(id_cols = c(Date, Strain, Genotype), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/Mock) %>%
arrange(Genotype)
tmp %>% group_by(Genotype) %>%
summarize(ASR_score = paste(round(as.numeric(ASR),1), collapse = ", "),
mean = mean(ASR), sd = sd(ASR))
kruskal.test(as.numeric(ASR) ~ Genotype, data = tmp)
pairwise.wilcox.test(x = as.numeric(tmp$ASR), g = tmp$Genotype, paired = FALSE, p.adjust.method = "none")
Goal
Test the hypothesis that direct inhibition of TORC1 can provide ASR in C. glabrata. We will also test S. cerevisiae to determine if the negative regulation by TORC1 on stress response genes behind the ASR is conserved.
Rationale
If TORC1 inhibition by phosphate limitation directly contributes to the ASR, we should be able to mimic the effect by artificially inhibiting TORC1 with the drug rapamycin. The prediction is that rapamycin treatment should induce Cta1-GFP and provide ASR.
Data
| Species | Rapamycin (ng/mL) | H2O2 | Description |
|---|---|---|---|
| C. glabrata | 50, 62.5, 125, 150 | 60 mM | full ASR experiment |
| S. cerevisiae | 50, 62.5, 125, 150 | 6 mM | full ASR experiment |
| Date | Species | Rapa (ng/mL) | H2O2 | Strain |
|---|---|---|---|---|
| 01/12/23 | Cg | 62.5, 125 | 60 mM | yH181 |
| 01/18/23 | Cg | 62.5, 125 | 60 mM | yH181 |
| 01/21/23 | Sc | 62.5, 125 | 4, 6 mM | yH154 |
| 01/21/23 | Cg | 62.5, 125 | 40, 60 mM | yH181 |
| 01/25/23 | Sc | 62.5, 125 | 4, 6 mM | yH154 |
| 01/25/23 | Cg | 62.5, 125 | 40, 60 mM | yH181 |
| 01/26/23 | Sc | 62.5, 125 | 4, 6 mM | yH154 |
| 01/26/23 | Cg | 62.5, 125 | 40, 60 mM | yH181 |
| 01/31/23 | Sc | 62.5, 125 | 4, 6 mM | yH154 |
| 01/31/23 | Cg | 62.5, 125 | 40, 60 mM | yH181 |
| 02/02/23 | Cg | 62.5, 125 | 40, 60 mM | yH181 |
Note
Jinye used lower H2O2 concentration compared with preivous ASR experiments (100 mM for Cg and 10 mM for Sc) because the primary stresses tested here, i.e., rapamycin and nitrogen starvation, reduced survival while phosphate starvation used in previous ASR experiments didn’t. In order to maintain a similar CFU at the end, a lower H2O2 concentration was applied.
tmp <- read_csv("../input/20230205-Sc-Cg-rapamycin-nitrogen-ASR-raw.csv", col_types = cols(), comment = "#") %>%
mutate(Date = gsub("d(\\d\\d)(\\d\\d)(\\d\\d)", "\\1/\\2/\\3", Date))
dat.f7a <- tmp %>%
rename(Primary = `1st_Stress`) %>%
group_by(Date, Strain, Primary) %>%
mutate(
scaled = Dilutions * Count * 1e-2,
r = scaled / scaled[H2O2 == "Mock"]
) %>%
# confirmed that my calculation (r) equals JY's MO/MM and PO/PM
# preserve her ASR_Score calculation for now
select(-`MO/MM`, -`PO/PM`, -ASR_Score, ASR_Score)
# data sanity check, quick view
sapply(select(dat.f7a, Species, Strain, Genotype), unique)
Adding missing grouping variables: `Date`, `Primary`
$Date
[1] "02/02/23" "01/31/23" "01/26/23" "01/25/23" "01/21/23"
[6] "01/18/23" "01/12/23"
$Primary
[1] "Mock" "62.5" "125" "0Ni"
$Species
[1] "Cg" "Sc"
$Strain
[1] "yH181" "yH154"
$Genotype
[1] "wt"
Plotting
Jinye mentioned that unlike phosphate starvation, rapamycin and nitrogen starvation reduce survival by themselves. To see this myself, I’m plotting the CFU after just the primary
tmp <- dat.f7a %>%
filter(H2O2 == "Mock") %>%
group_by(Date, Species) %>%
mutate(
# arbitrarily decide any CFU counts < 3 are not included due to high CV
Count = ifelse(Count < 3, NA, Count),
scaled = Count * Dilutions * 1e-2,
r = scaled / scaled[Primary == "Mock"]
) %>%
ungroup() %>%
filter(Primary != "Mock") %>%
select(Date, Species, Strain, Primary, Count, r)
dat.prim <- raw %>%
separate(Group, into = c("Primary", "Secondary"), sep = 1) %>%
# use the S2 dataset to examine the effect of phosphate starvation on survival
filter(Date %in% use.s1b, Secondary == "M") %>%
mutate(
# arbitrarily decide any CFU counts < 3 are not included due to high CV
Count = ifelse(Count < 3, NA, Count),
scaled = Count * Dilutions * 1e-2
) %>%
group_by(Date, Strain) %>%
# calculate r and r'
mutate(r = scaled / scaled[Primary == "M"]) %>%
ungroup() %>% filter(Primary != "M") %>%
select(Date, Species, Strain, Primary, Count, r) %>%
bind_rows(tmp) %>%
mutate(Primary = factor(Primary,
levels = c("P", "62.5", "125", "0Ni"),
labels = c("-Pi", "Rapa\n62.5",
"Rapa\n125", "-Nitrogen")))
ggplot(dat.prim, aes(x = Primary, y = r)) + p.asr +
scale_y_continuous(breaks = seq(0.2, 1.2, by = 0.2), labels = scales::percent) +
xlab("Primary stress (45 min)") + ylab("Survival %")
#ggsave("../output/20230211-primary-stress-effect-on-survival.png")
Statistical tests
Use the nest-map-unnest workflow
dat.prim %>%
select(Species, Primary, r) %>%
nest(data = r) %>%
mutate(
test = map(data, ~ t.test(.x, mu = 1, alternative = "two")),
tidied = map(test, tidy)
) %>%
unnest(tidied) %>%
select(Species, Primary, mean_r = estimate, p.value, conf.low, conf.high, alternative) %>%
mutate(P.adj = p.adjust(p.value, method = "BH"),
across(where(is.numeric), round, digits = 3)) %>%
arrange(Species, Primary)
Jinye used two sets of H2O2 concentrations in this experiment, i.e., 40/4 mM and 60/6 mM for C. glabrata and S. cerevisiae, respectively.
with(dat.f7a, table(Date, paste(Species, H2O2, sep = ":")))
Date Cg:40mM Cg:60mM Cg:Mock Sc:4mM Sc:6mM Sc:Mock
01/12/23 0 3 3 0 0 0
01/18/23 0 3 3 0 0 0
01/21/23 4 4 4 4 4 4
01/25/23 4 4 4 4 4 4
01/26/23 4 4 4 4 4 4
01/31/23 4 4 4 4 4 4
02/02/23 4 4 4 0 0 0
First, we will test if the survival under these H2O2 concentrations are comparable across species, by analyzing the basal survival rate r
Statistical test for differences in basal survival between species at 60 or 6 mM. A Wilcoxon signed-rank test is used here since the two groups are dependent (=grouped) by the day of the experiment. One replicate was run per day. Using a paired-test ensures that day-to-day variation is accounted for.
tmp <- dat.f7a %>%
filter(H2O2 %in% c("60mM", "6mM"), Group == "MO",
!(Date %in% c("01/12/23", "01/18/23", "02/02/23")))# %>%
tmp %>% group_by(Species) %>% summarize(mean = mean(r))
wilcox.test(r ~ Species, data = tmp, paired = TRUE)
Wilcoxon signed rank exact test
data: r by Species
V = 3, p-value = 0.625
alternative hypothesis: true location shift is not equal to 0
Statistical test for differences in basal survival between species at 40 or 4 mM.
tmp <- dat.f7a %>%
filter(H2O2 %in% c("40mM", "4mM"), Group == "MO", !(Date %in% c("02/02/23")))
tmp %>% group_by(Species) %>% summarize(mean = mean(r))
wilcox.test(r ~ Species, data = tmp, paired = TRUE)
Wilcoxon signed rank exact test
data: r by Species
V = 1, p-value = 0.25
alternative hypothesis: true location shift is not equal to 0
yes, the survival rates are comparable between species at the two pairs of chosen [H2O2]
Statistical test for differences in basal survival between the high and low H2O2 concentrations (60/6 vs 40/4 mM), species combined.
tmp <- filter(dat.f7a, Group == "MO") %>%
mutate(secondary = factor(H2O2, levels = c("60mM", "6mM", "40mM", "4mM"),
labels = c("high", "high", "low", "low")))
tmp %>%
group_by(secondary, Species) %>%
summarize(mean = num(mean(r), digits = 3), .groups = "drop")
wilcox.test(r ~ secondary, data = tmp, paired = FALSE)
Wilcoxon rank sum exact test
data: r by secondary
W = 21, p-value = 0.03103
alternative hypothesis: true location shift is not equal to 0
anova(lm(r ~ Species + secondary, data = tmp))
Analysis of Variance Table
Response: r
Df Sum Sq Mean Sq F value Pr(>F)
Species 1 0.0006071 0.0006071 0.3490 0.56246
secondary 1 0.0078300 0.0078300 4.5011 0.04887 *
Residuals 17 0.0295725 0.0017396
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
We can conclude that there is no significant difference in survival when comparing 40 mM vs 4 mM for C. glabrata vs S. cerevisiae. Similarly, there is no significant difference in survival when 60 mM and 6 mM were used to treat the two species, respectively. However, the survival rate is significantly lower under the higher set of concentrations (60 and 6 mM) compared with the lower ones (40 and 4 mM)
We will focus on 60/6 mM data for plotting and statistical analyses.
# we will not use the days of experiments where only one species was assayed
dat.7ap <- dat.f7a %>%
mutate(
Secondary = factor(H2O2, levels = c("40mM", "4mM", "60mM", "6mM"),
labels = c("40/4mM", "40/4mM", "60/6mM", "60/6mM"))
) %>%
filter(H2O2 != "Mock", Primary %in% c("125", "Mock")) %>%
mutate(Primary = factor(Primary, levels = c("Mock", "125")))
my calculation is the same as JY’s
with(dat.7ap, sum(round(rP/r, 5) != round(ASR_Score, 5)))
dat.7ap %>% filter(Secondary == "60/6mM") %>%
mutate(Primary = recode(Primary, "125" = "125 ng/mL")) %>%
ggplot(aes(x = Primary, y = r)) + p.survival[-1] +
geom_line(aes(group = paste0(Date, Secondary)), linetype = 2, linewidth = 0.3) +
geom_point(shape = 1, stroke = 1, size = 1) +
#labs(y = "% survival") +
theme(axis.title.x = element_blank(), strip.background = element_blank())
#ggsave("../output/20230520-rapamycin-ASR.png", width = 4.5, height = 2.2)
Summary statistics
dat.7as <- dat.7ap %>%
group_by(Date, Species, Strain, H2O2) %>%
mutate(rM = r[Primary == "Mock"], asr = r/rM) %>%
filter(Primary != "Mock") %>%
rename(rP = "r", r = "rM") %>%
relocate(c(r, asr), .after = "rP") %>%
ungroup()
dat.7as %>% select(-Genotype, -Group, -Dilutions, -Count, -scaled) %>%
mutate(across(where(is.double), round, digits = 2))
dat.7as %>%
group_by(Secondary, Species) %>%
summarize(
# smean.cl.boot returns a named vector. t() %>% as_tibble() turns it
# into a one-row tibble, stored as a list column. unnest() then flattens
# the list-column back out into regular columns
test = Hmisc::smean.cl.boot(ASR_Score) %>% t() %>% as_tibble()
) %>%
unnest(test) %>%
mutate(across(where(is.numeric), round, digits = 2))
`summarise()` has grouped output by 'Secondary'. You can override using the `.groups` argument.
Wilcoxon signed-rank test comparing r’ and r (paired data)
dat.7as %>%
select(Species, Secondary, r, rP) %>%
nest(data = c(r, rP)) %>%
mutate(
test = map(data, ~ wilcox.test(.x$rP, .x$r, paired = TRUE, alternative = "g")),
tidied = map(test, tidy)
) %>%
unnest(tidied) %>%
select(Species, Secondary, T = statistic, p.value, alternative) %>%
mutate(p.bonf = p.adjust(p.value, method = "bonf"),
p.holm = p.adjust(p.value, method = "holm"),
p.hoch = p.adjust(p.value, method = "hoch"),
across(starts_with("p."), round, digits = 5))
Paired t-tests comparing r’ and r
dat.7as %>%
select(Species, Secondary, r, rP) %>%
nest(data = c(r, rP)) %>%
mutate(
test = map(data, ~ t.test(.x$rP, .x$r, paired = TRUE, alternative = "g")),
tidied = map(test, tidy)
) %>%
unnest(tidied) %>%
select(Species, Secondary, p.value, alternative) %>%
mutate(P.adj = p.adjust(p.value, method = "BH"),
across(where(is.numeric), round, digits = 3))
In conclusion, we found significant ASR effect at 125 ng/mL rapamycin treatment in C. glabrata using either a Wilcoxon signed-rank test (nonparametric) or a paired t test, both using a 0.05 rejection threhold. The same test approached the 0.05 significance threshold in S. cerevisiae
Goal
Test if Sch9-3E mutant results in reduced ASR.
Rationale
It is known in S. cerevisiae that TORC1-inhibition leads to the derepression of Msn2/4-mediated stress response via the TORC1-Sch9-Rim15 pathway. Specifically, Sch9 is a key proximal effector of TORC1, which phosphorylates the former. Phosphorylated Sch9 then phosphorylates Rim15, causing it to be retained in the cytoplasm and unable to activate Msn2/4 in the nucleus. Jinye created a phosphomimetic mutant of Sch9 (aka Sch9-3E) that is predicted to be constitutively active with respect to TORC1 signaling. Therefore, this mutant is expected to always repress the Rim15-Msn4 downstream effectors even when TORC1 itself is inhibited.
Data
| Species | Genotype | H2O2 |
|---|---|---|
| C. glabrata | sch9::SCH9-wt-Nat | 0, 20, 40, 60, 80, 100 mM |
| C. glabrata | sch9::SCH9-3E-Nat | 0, 20, 40, 60, 80, 100 mM |
ASR experiments were repeated four times for the two strains on four consecutive days, from 05/14-05/17 of 2023.
Reformat JY’s data into a tidy format
tmp <- read_csv("../input/20230515-CgSCH9-3E-data-jy.csv", col_types = cols(), comment = "#") %>%
mutate(Date = gsub("d(\\d\\d)(\\d\\d)(\\d\\d)", "\\1/\\2/\\3", Date),
H2O2 = gsub("mM", " mM", H2O2),
Len_1 = recode(Len_1, `2hr` = "2 hr", `45min` = "45 min"),
Len_2 = recode(Len_2, `2hr` = "2 hr")) %>%
select(-`MO/MM_percent`, -`PO/PM_percent`)
# data sanity check, quick view
sapply(select(tmp, Species, Strain, Genotype, Len_1, Len_2, H2O2), unique)
$Species
[1] "Cg"
$Strain
[1] "yH692" "yH699" "yH693" "yH694" "yH695"
$Genotype
[1] "sch9_Nat_wt" "sch9_3E_Nat"
$Len_1
[1] "45 min"
$Len_2
[1] "2 hr"
$H2O2
[1] "Mock" "20 mM" "40 mM" "60 mM" "80 mM" "100 mM"
dat.f7b <- tmp %>%
pivot_longer(c(Count_MO_MM, Count_PO_PM), names_to = "Primary", values_to = "Count") %>%
mutate(Primary = fct_recode(Primary, "Mock" = "Count_MO_MM", "-Pi" = "Count_PO_PM"),
Genotype = fct_recode(Genotype,
`sch9::Sch9-3E-NAT` = "sch9_3E_Nat",
`sch9::Sch9-WT-NAT` = "sch9_Nat_wt"),
scaled = Count * Dilutions * 1e-3) %>%
group_by(Date, Strain, Genotype, Primary) %>%
mutate(r = num(scaled / scaled[H2O2 == "Mock"], digits = 3)) %>%
select(Date, Species, Strain, Genotype, Primary, H2O2, Dilutions, Count, scaled, r) %>%
arrange(Date, Genotype, Primary)
Write the table to a file for future references
Jinye tested a series of H2O2 concentrations in this experiment, i.e., 20, 40, 60, 80 and 100 mM. The goal here is to visualize the survival rates between the two genotypes and decide which pair to use for ASR comparisons.
dat.f7b %>%
filter(Primary == "Mock", H2O2 != "Mock") %>%
mutate(H2O2 = gsub(" mM", "", H2O2),
H2O2 = factor(H2O2) %>% fct_reorder(as.numeric(H2O2))) %>%
ggplot(aes(x = H2O2, y = r, group = Genotype)) +
geom_bar(aes(fill = Genotype), stat = "summary", fun = "mean",
position = position_dodge(0.9)) +
geom_point(position = position_dodge(0.9), size = 1, color = "gray30") +
scale_fill_brewer(palette = "Set2", direction = -1) +
xlab(bquote(H[2]*O[2]~(mM))) + ylab("Survival rate") +
theme_cowplot()
Somewhat to my surprise, the constitutively active Sch9-3E allele resulted in better survival without primary stress
For our purpose, let’s compare 40 mM and 60 mM for the wt to 100 mM for the Sch9-3E mutant.
A Wilcoxon signed-rank test is used here since the two groups are dependent (=grouped) by the day of the experiment. One replicate was run per day. Using a paired-test ensures that day-to-day variation is accounted for.
x1 = filter(dat.f7b, Genotype == "sch9::Sch9-WT-NAT", H2O2 == "40 mM") %>% pull(r) %>% as.double()
x2 = filter(dat.f7b, Genotype == "sch9::Sch9-WT-NAT", H2O2 == "60 mM") %>% pull(r) %>% as.double()
y = filter(dat.f7b, Genotype == "sch9::Sch9-3E-NAT", H2O2 == "100 mM") %>% pull(r) %>% as.double()
cbind("wt 40mM" = x1, "wt 60mM" = x2, "3E 100mM" = y)
wt 40mM wt 60mM 3E 100mM
[1,] 0.26621835 0.202531646 0.24174174
[2,] 0.40914634 0.362195122 0.17598684
[3,] 0.24153298 0.182263815 0.17717087
[4,] 0.38329646 0.342920354 0.17822384
[5,] 0.05391121 0.043868922 0.11525974
[6,] 0.19563197 0.095724907 0.10425532
[7,] 0.02770936 0.004310345 0.08857616
[8,] 0.15099715 0.064814815 0.11287879
cat("\n")
# Wilcoxon signed-rank tests
cat("<wt 40mM> vs <wt 60mM>")
<wt 40mM> vs <wt 60mM>
wilcox.test(x1, x2, paired = TRUE)
Wilcoxon signed rank exact test
data: x1 and x2
V = 36, p-value = 0.007813
alternative hypothesis: true location shift is not equal to 0
cat("<wt 40mM> vs <3E 100mM>")
<wt 40mM> vs <3E 100mM>
wilcox.test(x1, y, paired = TRUE)
Wilcoxon signed rank exact test
data: x1 and y
V = 29, p-value = 0.1484
alternative hypothesis: true location shift is not equal to 0
cat("<wt 60mM> vs <3E 100mM>")
<wt 60mM> vs <3E 100mM>
wilcox.test(x2, y, paired = TRUE)
Wilcoxon signed rank exact test
data: x2 and y
V = 16, p-value = 0.8438
alternative hypothesis: true location shift is not equal to 0
cat("\n")
# paired t-test
cat("<wt 40mM> vs <wt 80mM>")
<wt 40mM> vs <wt 80mM>
t.test(x1, x2, paired = TRUE)
Paired t-test
data: x1 and x2
t = 5.0382, df = 7, p-value = 0.001499
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
0.02851081 0.07894267
sample estimates:
mean difference
0.05372674
cat("<wt 40mM> vs <3E 100mM>")
<wt 40mM> vs <3E 100mM>
t.test(x1, y, paired = TRUE)
Paired t-test
data: x1 and y
t = 1.7377, df = 7, p-value = 0.1258
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
-0.02409797 0.15768560
sample estimates:
mean difference
0.06679381
cat("<wt 60mM> vs <3E 100mM>")
<wt 60mM> vs <3E 100mM>
t.test(x2, y, paired = TRUE)
Paired t-test
data: x2 and y
t = 0.35337, df = 7, p-value = 0.7342
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
-0.07437387 0.10050802
sample estimates:
mean difference
0.01306708
For the wt strain, 40 mM clearly resulted in better survival than 60 mM, as expected. When comparing between wt and 3E mutant, 40 mM for wt and 100 mM for 3E have mean survival rates that are closer. Paired statistical tests yield P-values greater than 0.1 for both the 40mM and 60mM of wt compared with 100mM for the 3E mutant.
From the plot for the basal survival rate above, there appears to be a lot of day-to-day variation. Here we visualize this variation more clearly
dat.f7b %>%
filter(H2O2 != "Mock") %>%
mutate(H2O2 = gsub(" mM", "", H2O2),
H2O2 = factor(H2O2) %>% fct_reorder(as.numeric(H2O2))) %>%
ggplot(aes(x = H2O2, y = r)) +
geom_point(aes(color = Genotype, shape = Date, group = Genotype),
size = 2, position = position_dodge(.5)) +
xlab(bquote(H[2]*O[2]~(mM))) + ylab("Survival rate") +
scale_color_brewer(palette = "Set2", direction = -1) +
facet_wrap(~Primary, nrow = 2) + theme_cowplot() +
background_grid(major = "y")
We will subset the data to include the 40 mM and 60 mM for WT and 100 mM for 3E
dat.7bp <- ungroup(dat.f7b) %>%
filter(H2O2 != "Mock") %>%
mutate(
H2O2 = gsub(" mM", "", H2O2),
H2O2 = factor(H2O2) %>% fct_reorder(as.numeric(H2O2)),
group = paste(str_sub(Genotype, 12, 13), H2O2, sep = "_")
) %>%
filter(H2O2 != "Mock", group %in% c("3E_100", "WT_60", "WT_40")) %>%
select(-Dilutions, -Count, -Species)
dat.7bs <- dat.7bp %>%
group_by(Date, Strain, Genotype, H2O2) %>% # form groups to apply the same MO/MM
mutate(
rM = r[Primary == "Mock"], # MO/MM for each group
ASR_Score = r/rM
) %>% ungroup() %>%
rename(rP = "r", r = "rM") %>%
filter(Primary != "Mock") %>%
relocate(c(r, ASR_Score), .after = "rP")
Main comparison will be between wt_40 and 3E_100
Summary statistics
dat.7bs %>%
#filter(secondary == "60/6mM") %>%
group_by(Genotype, group) %>%
summarize(
# smean.cl.boot returns a named vector. t() %>% as_tibble() turns it
# into a one-row tibble, stored as a list column. unnest() then flattens
# the list-column back out into regular columns
test = Hmisc::smean.cl.boot(as.double(ASR_Score)) %>% t() %>% as_tibble()
) %>%
unnest(test) %>%
mutate(across(where(is.numeric), round, digits = 2))
`summarise()` has grouped output by 'Genotype'. You can override using the `.groups` argument.
Wilcoxon signed-rank test comparing r’ and r (paired data)
dat.7bs %>%
select(Genotype, group, r, rP) %>%
nest(data = c(r, rP)) %>%
mutate(
test = map(data, ~ wilcox.test(.x$rP, .x$r, paired = TRUE, alternative = "g")),
tidied = map(test, tidy)
) %>%
unnest(tidied) %>%
select(Genotype, group, T = statistic, p.value, alternative) %>%
mutate(p.bonf = p.adjust(p.value, method = "bonf"),
p.holm = p.adjust(p.value, method = "holm"),
p.hoch = p.adjust(p.value, method = "hoch"),
across(starts_with("p."), round, digits = 5))
Paired t-tests comparing r’ and r
dat.7bs %>%
#filter(primary != "0Ni") %>%
select(Genotype, group, r, rP) %>%
nest(data = c(r, rP)) %>%
mutate(
test = map(data, ~ t.test(.x$rP, .x$r, paired = TRUE, alternative = "g")),
tidied = map(test, tidy)
) %>%
unnest(tidied) %>%
select(Genotype, group, p.value, alternative) %>%
mutate(P.adj = p.adjust(p.value, method = "BH"),
across(where(is.numeric), round, digits = 3))
In conclusion, we found significant ASR effect at 125 ng/mL rapamycin treatment in C. glabrata using either a Wilcoxon signed-rank test (nonparametric) or a paired t test, both using a 0.05 rejection threhold. We couldn’t reject the null hypothesis of no survival difference between rapamycin treated vs untreated samples for the 62.5 ng/mL concentration, and also not for the higher dose in S. cerevisiae
Goal
Determine if transcriptional factor (TF) msn4 and skn7 contribute to the ASR.
Rationale
Msn4 and Skn7 are stress responsive TFs regulating the Cta1-GFP induction during phosphate starvation. The reviewer is wondering if the loss of Msn4 or Skn7 affect the ASR for H2O2. To answer this quesion, Jinye performed the CFU assay for H2O2 ASR with a gradient of H2O2 concentrations for wt, msn4∆ and skn7∆.
Data
| Species | Strain | Genotype | H2O2 |
|---|---|---|---|
| C. glabrata | yH001, yH002 | wildtype 6 | 0, 80, 100 mM |
| C. glabrata | yH396, yH397 | msn4∆ 2 | 0, 40, 60 mM |
| C. glabrata | yH422, yH423 | skn7∆ 1 | 0, 20, 30, 40 mM |
Five replicates (at three [H2O2]) for wt and msn4∆, four replicates (at three [H2O2]) for skn7∆.
Jinye has reformatted her raw data and saved the output as a tsv. We will skip the code below and go straight to the reformatted version.
tmp1 <- read_tsv("../input/msn4orskn7_ko_ASR_raw.tsv", col_types = cols(), comment = "#") %>%
mutate(Date = gsub("(\\d\\d)(\\d\\d)(\\d\\d)", "\\1/\\2/\\3", Date)) %>%
mutate(H2O2 = recode(H2O2, 'Mock' = '0mM')) %>%
mutate(H2O2 = gsub("mM", " mM", H2O2)) %>%
select(-`MO/MM_percent`, -`PO/PM_percent`)
# data sanity check, quick view
sapply(select(tmp1, Date, Species, Strain, Genotype, Len_1, Len_2, H2O2), unique)
dat.sf <- tmp1 %>%
pivot_longer(c(Count_MO_MM, Count_PO_PM), names_to = "Primary", values_to = "Count") %>%
mutate(Primary = fct_recode(Primary, "Mock" = "Count_MO_MM", "-Pi" = "Count_PO_PM"),
scaled = Count * Dilutions * 1e-3) %>%
group_by(Date, Strain, Genotype, Primary) %>%
mutate(scaled_M = scaled[H2O2 == "0 mM"],
r = scaled / scaled_M) %>%
ungroup() %>%
select(Date, Species, Strain, Genotype, Primary, H2O2, Dilutions,
Count, scaled, scaled_M, r) %>%
arrange(Date, Genotype, Primary)
write_tsv(dat.sf, file = "../input/20230913-sf-data-JY.tsv")
dat.sf <- read_tsv("../input/20230913-sf-data-JY.tsv")
Rows: 112 Columns: 11── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr (6): Date, Species, Strain, Genotype, Primary, H2O2
dbl (5): Dilutions, Count, scaled, scaled_M, r
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
filter out the skn7Δ 40mM treatment group, because the [H2O2] is too high according to the spotting assay result.
dat.sfp <- dat.sf %>%
filter(H2O2 != "0 mM", !(H2O2 == "40 mM" & Genotype == "skn7Δ")) %>%
mutate(
Genotype = factor(Genotype, levels = c("wt", "msn4Δ", "skn7Δ")),
group = paste(Genotype, gsub(" ", "", H2O2), sep = "_"),
Secondary =
factor(group,
levels = c("wt_100mM", "wt_80mM", "wt_60mM", "msn4Δ_60mM",
"msn4Δ_40mM", "msn4Δ_20mM",
"skn7Δ_30mM", "skn7Δ_20mM", "skn7Δ_10mM"),
labels = c("High", "Medium", "low", "High", "Medium",
"low", "Medium", "low", "bottom")),
Primary = factor(Primary, levels = c("Mock", "-Pi"))
)
with(dat.sfp, table(Date, group))
group
Date msn4Δ_20mM msn4Δ_40mM msn4Δ_60mM skn7Δ_10mM skn7Δ_20mM skn7Δ_30mM
d09/04/23 2 2 2 0 0 0
d09/05/23 2 2 2 2 2 0
d09/06/23 2 2 2 2 2 2
d09/07/23 2 2 2 2 2 2
d09/08/23 2 2 2 2 2 2
group
Date wt_100mM wt_60mM wt_80mM
d09/04/23 2 2 2
d09/05/23 2 2 2
d09/06/23 2 2 2
d09/07/23 2 2 2
d09/08/23 2 2 2
Plot the Medium group to include in the response letter
dat.sfp %>%
filter(Secondary == "Medium") %>%
ggplot(aes(x = H2O2, y = r)) + #p.survival[-3] +
geom_point(aes(shape = Primary), stroke = 1, size = 2,
position = position_jitterdodge(jitter.width = 0.2, dodge.width = 0.9)) +
stat_summary(aes(group = Primary), position = position_dodge(0.9),
fun = mean, fun.max = mean, fun.min = mean,
geom = "crossbar", color = "red", width = 0.5) +
facet_wrap(~ Genotype, nrow = 1, scales = "free_x") +
scale_shape_manual(values = c("Mock" = 1, "-Pi" = 16)) +
scale_y_continuous(labels = scales::percent) +
xlab(bquote(H[2]*O[2]~(mM))) + ylab("% survival") +
theme_bw(base_size = 14, base_line_size = 1) +
panel_border(color = "black", size = 1) +
theme(strip.text = element_text(size = rel(1), face = 3),
strip.background = element_blank())
ggsave("../output/20230913-msn4-skn7-ASR.png", width = 5, height = 3)
Statistical test for differences in basal survival between genotypes. I first applied the Kruskal-Wallis test, which is an extension of the Mann-Whitney’s U test for multiple groups and is equivalent to ANOVA but applied on ranks. I used this test to determine if there is evidence for differences in the basal survival rates among the four groups (2 genotypes x 2 [H2O2]).
tmp <- dat.sfp %>% filter(Secondary == "Medium", Primary == "Mock")
tmp %>% group_by(group) %>%
summarize(
test = Hmisc::smean.cl.normal(r) %>% t() %>% as_tibble()
) %>%
unnest(test) %>%
mutate(across(where(is.double), round, digits = 3))
kruskal.test(r ~ group, data = tmp)
Kruskal-Wallis rank sum test
data: r by group
Kruskal-Wallis chi-squared = 2.8835, df = 2, p-value = 0.2365
There are no significance differences in the basal survival rate in the Medium group across genotypes
Comparing the ASR effect in skn7Δ, msn4∆ and wild type using Kruskal-Wallis Rank Sum test.
tmp <- dat.sfp %>%
filter(Secondary == "Medium") %>%
pivot_wider(id_cols = c(Date, Strain, Genotype), names_from = Primary, values_from = r) %>%
mutate(ASR = `-Pi`/Mock) %>%
arrange(Genotype)
tmp %>% group_by(Genotype) %>%
summarize(ASR_score = paste(round(as.numeric(ASR),1), collapse = ", "),
mean = mean(ASR), sd = sd(ASR))
kruskal.test(as.numeric(ASR) ~ Genotype, data = tmp)
Kruskal-Wallis rank sum test
data: as.numeric(ASR) by Genotype
Kruskal-Wallis chi-squared = 2.3209, df = 2, p-value = 0.3133
pairwise.wilcox.test(x = as.numeric(tmp$ASR), g = tmp$Genotype, paired = FALSE, p.adjust.method = "holm")
Pairwise comparisons using Wilcoxon rank sum exact test
data: as.numeric(tmp$ASR) and tmp$Genotype
wt msn4Δ
msn4Δ 1.00 -
skn7Δ 0.75 0.75
P value adjustment method: holm
There is no significant difference in ASR among the three genotypes